summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c9
-rw-r--r--fs/btrfs/ioctl.c30
-rw-r--r--fs/dlm/debug_fs.c21
-rw-r--r--fs/dlm/dlm_internal.h8
-rw-r--r--fs/dlm/lowcomms.c18
-rw-r--r--fs/dlm/main.c5
-rw-r--r--fs/efivarfs/file.c26
-rw-r--r--fs/ext2/ioctl.c16
-rw-r--r--fs/ext4/ioctl.c51
-rw-r--r--fs/f2fs/checkpoint.c107
-rw-r--r--fs/f2fs/data.c249
-rw-r--r--fs/f2fs/debug.c7
-rw-r--r--fs/f2fs/dir.c16
-rw-r--r--fs/f2fs/extent_cache.c7
-rw-r--r--fs/f2fs/f2fs.h129
-rw-r--r--fs/f2fs/file.c302
-rw-r--r--fs/f2fs/gc.c196
-rw-r--r--fs/f2fs/inline.c16
-rw-r--r--fs/f2fs/inode.c78
-rw-r--r--fs/f2fs/namei.c10
-rw-r--r--fs/f2fs/node.c38
-rw-r--r--fs/f2fs/recovery.c43
-rw-r--r--fs/f2fs/segment.c170
-rw-r--r--fs/f2fs/segment.h16
-rw-r--r--fs/f2fs/super.c610
-rw-r--r--fs/f2fs/sysfs.c22
-rw-r--r--fs/f2fs/xattr.c10
-rw-r--r--fs/gfs2/file.c42
-rw-r--r--fs/hfsplus/ioctl.c21
-rw-r--r--fs/inode.c86
-rw-r--r--fs/io_uring.c338
-rw-r--r--fs/jfs/ioctl.c22
-rw-r--r--fs/nilfs2/ioctl.c9
-rw-r--r--fs/ocfs2/ioctl.c13
-rw-r--r--fs/orangefs/file.c37
-rw-r--r--fs/reiserfs/ioctl.c10
-rw-r--r--fs/splice.c8
-rw-r--r--fs/ubifs/ioctl.c13
-rw-r--r--fs/xfs/Makefile9
-rw-r--r--fs/xfs/kmem.c5
-rw-r--r--fs/xfs/kmem.h8
-rw-r--r--fs/xfs/libxfs/xfs_ag.c100
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c8
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c227
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c15
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c14
-rw-r--r--fs/xfs/libxfs/xfs_bit.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c19
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/libxfs/xfs_btree.c49
-rw-r--r--fs/xfs/libxfs/xfs_btree.h14
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c3
-rw-r--r--fs/xfs/libxfs/xfs_defer.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c5
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c10
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_fs.h124
-rw-r--r--fs/xfs/libxfs/xfs_health.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c245
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h18
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c56
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c6
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c7
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c39
-rw-r--r--fs/xfs/libxfs/xfs_shared.h49
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c10
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c17
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h7
-rw-r--r--fs/xfs/libxfs/xfs_types.c13
-rw-r--r--fs/xfs/scrub/agheader.c11
-rw-r--r--fs/xfs/scrub/agheader_repair.c5
-rw-r--r--fs/xfs/scrub/alloc.c7
-rw-r--r--fs/xfs/scrub/attr.c122
-rw-r--r--fs/xfs/scrub/attr.h71
-rw-r--r--fs/xfs/scrub/bitmap.c5
-rw-r--r--fs/xfs/scrub/bmap.c8
-rw-r--r--fs/xfs/scrub/btree.c7
-rw-r--r--fs/xfs/scrub/common.c8
-rw-r--r--fs/xfs/scrub/dabtree.c8
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/fscounters.c12
-rw-r--r--fs/xfs/scrub/health.c8
-rw-r--r--fs/xfs/scrub/ialloc.c28
-rw-r--r--fs/xfs/scrub/inode.c10
-rw-r--r--fs/xfs/scrub/parent.c8
-rw-r--r--fs/xfs/scrub/quota.c13
-rw-r--r--fs/xfs/scrub/refcount.c10
-rw-r--r--fs/xfs/scrub/repair.c14
-rw-r--r--fs/xfs/scrub/rmap.c9
-rw-r--r--fs/xfs/scrub/rtbitmap.c7
-rw-r--r--fs/xfs/scrub/scrub.c20
-rw-r--r--fs/xfs/scrub/symlink.c8
-rw-r--r--fs/xfs/scrub/trace.c6
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_aops.c121
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_attr_inactive.c7
-rw-r--r--fs/xfs/xfs_attr_list.c7
-rw-r--r--fs/xfs/xfs_bio_io.c61
-rw-r--r--fs/xfs/xfs_bmap_item.c350
-rw-r--r--fs/xfs/xfs_bmap_item.h2
-rw-r--r--fs/xfs/xfs_bmap_util.c11
-rw-r--r--fs/xfs/xfs_buf.c171
-rw-r--r--fs/xfs/xfs_buf.h53
-rw-r--r--fs/xfs/xfs_buf_item.c40
-rw-r--r--fs/xfs/xfs_buf_item.h6
-rw-r--r--fs/xfs/xfs_dir2_readdir.c5
-rw-r--r--fs/xfs/xfs_discard.c4
-rw-r--r--fs/xfs/xfs_dquot.c6
-rw-r--r--fs/xfs/xfs_dquot.h1
-rw-r--r--fs/xfs/xfs_dquot_item.c118
-rw-r--r--fs/xfs/xfs_dquot_item.h4
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_export.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c410
-rw-r--r--fs/xfs/xfs_extfree_item.h6
-rw-r--r--fs/xfs/xfs_file.c23
-rw-r--r--fs/xfs/xfs_filestream.c5
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_globals.c4
-rw-r--r--fs/xfs/xfs_health.c6
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_icreate_item.c75
-rw-r--r--fs/xfs/xfs_inode.c42
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_ioctl.c448
-rw-r--r--fs/xfs/xfs_ioctl.h8
-rw-r--r--fs/xfs/xfs_ioctl32.c161
-rw-r--r--fs/xfs/xfs_ioctl32.h14
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_iops.c10
-rw-r--r--fs/xfs/xfs_itable.c749
-rw-r--r--fs/xfs/xfs_itable.h106
-rw-r--r--fs/xfs/xfs_iwalk.c720
-rw-r--r--fs/xfs/xfs_iwalk.h46
-rw-r--r--fs/xfs/xfs_linux.h5
-rw-r--r--fs/xfs/xfs_log.c644
-rw-r--r--fs/xfs/xfs_log.h17
-rw-r--r--fs/xfs/xfs_log_cil.c51
-rw-r--r--fs/xfs/xfs_log_priv.h36
-rw-r--r--fs/xfs/xfs_log_recover.c463
-rw-r--r--fs/xfs/xfs_message.c2
-rw-r--r--fs/xfs/xfs_mount.c102
-rw-r--r--fs/xfs/xfs_mount.h22
-rw-r--r--fs/xfs/xfs_ondisk.h5
-rw-r--r--fs/xfs/xfs_pnfs.c9
-rw-r--r--fs/xfs/xfs_pwork.c136
-rw-r--r--fs/xfs/xfs_pwork.h61
-rw-r--r--fs/xfs/xfs_qm.c68
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c5
-rw-r--r--fs/xfs/xfs_quotaops.c3
-rw-r--r--fs/xfs/xfs_refcount_item.c357
-rw-r--r--fs/xfs/xfs_refcount_item.h2
-rw-r--r--fs/xfs/xfs_reflink.c15
-rw-r--r--fs/xfs/xfs_rmap_item.c380
-rw-r--r--fs/xfs/xfs_rmap_item.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c6
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_super.c32
-rw-r--r--fs/xfs/xfs_super.h14
-rw-r--r--fs/xfs/xfs_symlink.c9
-rw-r--r--fs/xfs/xfs_sysctl.c3
-rw-r--r--fs/xfs/xfs_sysctl.h3
-rw-r--r--fs/xfs/xfs_sysfs.c42
-rw-r--r--fs/xfs/xfs_trace.c8
-rw-r--r--fs/xfs/xfs_trace.h61
-rw-r--r--fs/xfs/xfs_trans.c43
-rw-r--r--fs/xfs/xfs_trans.h70
-rw-r--r--fs/xfs/xfs_trans_ail.c53
-rw-r--r--fs/xfs/xfs_trans_bmap.c232
-rw-r--r--fs/xfs/xfs_trans_buf.c11
-rw-r--r--fs/xfs/xfs_trans_dquot.c11
-rw-r--r--fs/xfs/xfs_trans_extfree.c286
-rw-r--r--fs/xfs/xfs_trans_inode.c3
-rw-r--r--fs/xfs/xfs_trans_priv.h4
-rw-r--r--fs/xfs/xfs_trans_refcount.c240
-rw-r--r--fs/xfs/xfs_trans_rmap.c257
-rw-r--r--fs/xfs/xfs_xattr.c5
198 files changed, 6262 insertions, 5801 deletions
diff --git a/fs/aio.c b/fs/aio.c
index c1e581dd32f5..2d405733a8c6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1479,8 +1479,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
return 0;
}
-static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec,
- bool vectored, bool compat, struct iov_iter *iter)
+static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
+ struct iovec **iovec, bool vectored, bool compat,
+ struct iov_iter *iter)
{
void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf;
size_t len = iocb->aio_nbytes;
@@ -1537,7 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb,
return -EINVAL;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
- if (ret)
+ if (ret < 0)
return ret;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
@@ -1565,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
return -EINVAL;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
- if (ret)
+ if (ret < 0)
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 56ae2f659b6d..cfeff1b8dce0 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -187,7 +187,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- unsigned int fsflags;
+ unsigned int fsflags, old_fsflags;
int ret;
const char *comp = NULL;
u32 binode_flags = binode->flags;
@@ -212,13 +212,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
inode_lock(inode);
fsflags = btrfs_mask_fsflags_for_type(inode, fsflags);
- if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) &
- (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
- goto out_unlock;
- }
- }
+ old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags);
+ ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
+ if (ret)
+ goto out_unlock;
if (fsflags & FS_SYNC_FL)
binode_flags |= BTRFS_INODE_SYNC;
@@ -376,9 +373,7 @@ static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(file_inode(file));
struct fsxattr fa;
- memset(&fa, 0, sizeof(fa));
- fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags);
-
+ simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags));
if (copy_to_user(arg, &fa, sizeof(fa)))
return -EFAULT;
@@ -391,7 +386,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
struct btrfs_inode *binode = BTRFS_I(inode);
struct btrfs_root *root = binode->root;
struct btrfs_trans_handle *trans;
- struct fsxattr fa;
+ struct fsxattr fa, old_fa;
unsigned old_flags;
unsigned old_i_flags;
int ret = 0;
@@ -402,7 +397,6 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
if (btrfs_root_readonly(root))
return -EROFS;
- memset(&fa, 0, sizeof(fa));
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
@@ -422,13 +416,11 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
old_flags = binode->flags;
old_i_flags = inode->i_flags;
- /* We need the capabilities to change append-only or immutable inode */
- if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) ||
- (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) &&
- !capable(CAP_LINUX_IMMUTABLE)) {
- ret = -EPERM;
+ simple_fill_fsxattr(&old_fa,
+ btrfs_inode_flags_to_xflags(binode->flags));
+ ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
+ if (ret)
goto out_unlock;
- }
if (fa.fsx_xflags & FS_XFLAG_SYNC)
binode->flags |= BTRFS_INODE_SYNC;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 0e941d42e3e9..d6bbccb0ed15 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -737,7 +737,7 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
debugfs_remove(ls->ls_debug_toss_dentry);
}
-int dlm_create_debug_file(struct dlm_ls *ls)
+void dlm_create_debug_file(struct dlm_ls *ls)
{
char name[DLM_LOCKSPACE_LEN + 8];
@@ -748,8 +748,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format1_fops);
- if (!ls->ls_debug_rsb_dentry)
- goto fail;
/* format 2 */
@@ -761,8 +759,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format2_fops);
- if (!ls->ls_debug_locks_dentry)
- goto fail;
/* format 3 */
@@ -774,8 +770,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format3_fops);
- if (!ls->ls_debug_all_dentry)
- goto fail;
/* format 4 */
@@ -787,8 +781,6 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&format4_fops);
- if (!ls->ls_debug_toss_dentry)
- goto fail;
memset(name, 0, sizeof(name));
snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name);
@@ -798,21 +790,12 @@ int dlm_create_debug_file(struct dlm_ls *ls)
dlm_root,
ls,
&waiters_fops);
- if (!ls->ls_debug_waiters_dentry)
- goto fail;
-
- return 0;
-
- fail:
- dlm_delete_debug_file(ls);
- return -ENOMEM;
}
-int __init dlm_register_debugfs(void)
+void __init dlm_register_debugfs(void)
{
mutex_init(&debug_buf_lock);
dlm_root = debugfs_create_dir("dlm", NULL);
- return dlm_root ? 0 : -ENOMEM;
}
void dlm_unregister_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index da1173a0b274..416d9de35679 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -719,14 +719,14 @@ int dlm_plock_init(void);
void dlm_plock_exit(void);
#ifdef CONFIG_DLM_DEBUG
-int dlm_register_debugfs(void);
+void dlm_register_debugfs(void);
void dlm_unregister_debugfs(void);
-int dlm_create_debug_file(struct dlm_ls *ls);
+void dlm_create_debug_file(struct dlm_ls *ls);
void dlm_delete_debug_file(struct dlm_ls *ls);
#else
-static inline int dlm_register_debugfs(void) { return 0; }
+static inline void dlm_register_debugfs(void) { }
static inline void dlm_unregister_debugfs(void) { }
-static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; }
+static inline void dlm_create_debug_file(struct dlm_ls *ls) { }
static inline void dlm_delete_debug_file(struct dlm_ls *ls) { }
#endif
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 114ebfe30929..3951d39b9b75 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1628,8 +1628,10 @@ static void clean_writequeues(void)
static void work_stop(void)
{
- destroy_workqueue(recv_workqueue);
- destroy_workqueue(send_workqueue);
+ if (recv_workqueue)
+ destroy_workqueue(recv_workqueue);
+ if (send_workqueue)
+ destroy_workqueue(send_workqueue);
}
static int work_start(void)
@@ -1689,13 +1691,17 @@ static void work_flush(void)
struct hlist_node *n;
struct connection *con;
- flush_workqueue(recv_workqueue);
- flush_workqueue(send_workqueue);
+ if (recv_workqueue)
+ flush_workqueue(recv_workqueue);
+ if (send_workqueue)
+ flush_workqueue(send_workqueue);
do {
ok = 1;
foreach_conn(stop_conn);
- flush_workqueue(recv_workqueue);
- flush_workqueue(send_workqueue);
+ if (recv_workqueue)
+ flush_workqueue(recv_workqueue);
+ if (send_workqueue)
+ flush_workqueue(send_workqueue);
for (i = 0; i < CONN_HASH_SIZE && ok; i++) {
hlist_for_each_entry_safe(con, n,
&connection_hash[i], list) {
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 39579927ed84..afc66a1346d3 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -35,9 +35,7 @@ static int __init init_dlm(void)
if (error)
goto out_lockspace;
- error = dlm_register_debugfs();
- if (error)
- goto out_config;
+ dlm_register_debugfs();
error = dlm_user_init();
if (error)
@@ -61,7 +59,6 @@ static int __init init_dlm(void)
dlm_user_exit();
out_debug:
dlm_unregister_debugfs();
- out_config:
dlm_config_exit();
out_lockspace:
dlm_lockspace_exit();
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index ee3bc0c96b9d..e9e27a271af0 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -107,16 +107,22 @@ out_free:
return size;
}
-static int
-efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+static inline unsigned int efivarfs_getflags(struct inode *inode)
{
- struct inode *inode = file->f_mapping->host;
unsigned int i_flags;
unsigned int flags = 0;
i_flags = inode->i_flags;
if (i_flags & S_IMMUTABLE)
flags |= FS_IMMUTABLE_FL;
+ return flags;
+}
+
+static int
+efivarfs_ioc_getxflags(struct file *file, void __user *arg)
+{
+ struct inode *inode = file->f_mapping->host;
+ unsigned int flags = efivarfs_getflags(inode);
if (copy_to_user(arg, &flags, sizeof(flags)))
return -EFAULT;
@@ -129,6 +135,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
struct inode *inode = file->f_mapping->host;
unsigned int flags;
unsigned int i_flags = 0;
+ unsigned int oldflags = efivarfs_getflags(inode);
int error;
if (!inode_owner_or_capable(inode))
@@ -140,9 +147,6 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
if (flags & ~FS_IMMUTABLE_FL)
return -EOPNOTSUPP;
- if (!capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
if (flags & FS_IMMUTABLE_FL)
i_flags |= S_IMMUTABLE;
@@ -152,12 +156,16 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
return error;
inode_lock(inode);
+
+ error = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (error)
+ goto out;
+
inode_set_flags(inode, i_flags, S_IMMUTABLE);
+out:
inode_unlock(inode);
-
mnt_drop_write_file(file);
-
- return 0;
+ return error;
}
static long
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 0367c0039e68..1b853fb0b163 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -60,18 +60,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
oldflags = ei->i_flags;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- inode_unlock(inode);
- ret = -EPERM;
- goto setflags_out;
- }
+ ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (ret) {
+ inode_unlock(inode);
+ goto setflags_out;
}
flags = flags & EXT2_FL_USER_MODIFIABLE;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 74648d42c69b..442f7ef873fc 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -312,16 +312,9 @@ static int ext4_ioctl_setflags(struct inode *inode,
/* The JOURNAL_DATA flag is modifiable only by root */
jflag = flags & EXT4_JOURNAL_DATA_FL;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- *
- * This test looks nicer. Thanks to Pauline Middelink
- */
- if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto flags_out;
- }
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err)
+ goto flags_out;
/*
* The JOURNAL_DATA flag can only be changed by
@@ -741,28 +734,15 @@ group_add_out:
return err;
}
-static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
+static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
{
- /*
- * Project Quota ID state is only allowed to change from within the init
- * namespace. Enforce that restriction only if we are trying to change
- * the quota ID state. Everything else is allowed in user namespaces.
- */
- if (current_user_ns() == &init_user_ns)
- return 0;
-
- if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid)
- return -EINVAL;
+ struct ext4_inode_info *ei = EXT4_I(inode);
- if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) {
- if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
- return -EINVAL;
- } else {
- if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
- return -EINVAL;
- }
+ simple_fill_fsxattr(fa, ext4_iflags_to_xflags(ei->i_flags &
+ EXT4_FL_USER_VISIBLE));
- return 0;
+ if (ext4_has_feature_project(inode->i_sb))
+ fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid);
}
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
@@ -1139,13 +1119,7 @@ resizefs_out:
{
struct fsxattr fa;
- memset(&fa, 0, sizeof(struct fsxattr));
- fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE);
-
- if (ext4_has_feature_project(inode->i_sb)) {
- fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
- EXT4_I(inode)->i_projid);
- }
+ ext4_fill_fsxattr(inode, &fa);
if (copy_to_user((struct fsxattr __user *)arg,
&fa, sizeof(fa)))
@@ -1154,7 +1128,7 @@ resizefs_out:
}
case EXT4_IOC_FSSETXATTR:
{
- struct fsxattr fa;
+ struct fsxattr fa, old_fa;
int err;
if (copy_from_user(&fa, (struct fsxattr __user *)arg,
@@ -1177,7 +1151,8 @@ resizefs_out:
return err;
inode_lock(inode);
- err = ext4_ioctl_check_project(inode, &fa);
+ ext4_fill_fsxattr(inode, &old_fa);
+ err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
if (err)
goto out;
flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ed70b68b2b38..a0eef95b9e0e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -146,8 +146,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
exist = f2fs_test_bit(offset, se->cur_valid_map);
if (!exist && type == DATA_GENERIC_ENHANCE) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "blkaddr:%u, sit bitmap:%d", blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
}
@@ -184,8 +184,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case DATA_GENERIC_ENHANCE_READ:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi))) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "access invalid blkaddr:%u", blkaddr);
+ f2fs_warn(sbi, "access invalid blkaddr:%u",
+ blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
WARN_ON(1);
return false;
@@ -657,9 +657,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
err_out:
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: orphan failed (ino=%x), run fsck to fix.",
- __func__, ino);
+ f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.",
+ __func__, ino);
return err;
}
@@ -676,13 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
return 0;
if (bdev_read_only(sbi->sb->s_bdev)) {
- f2fs_msg(sbi->sb, KERN_INFO, "write access "
- "unavailable, skipping orphan cleanup");
+ f2fs_info(sbi, "write access unavailable, skipping orphan cleanup");
return 0;
}
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
+ f2fs_info(sbi, "orphan cleanup on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
@@ -827,26 +825,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
if (crc_offset < CP_MIN_CHKSUM_OFFSET ||
crc_offset > CP_CHKSUM_OFFSET) {
f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid crc_offset: %zu", crc_offset);
+ f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset);
return -EINVAL;
}
- if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) {
- if (crc_offset != CP_MIN_CHKSUM_OFFSET) {
- f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "layout of large_nat_bitmap is deprecated, "
- "run fsck to repair, chksum_offset: %zu",
- crc_offset);
- return -EINVAL;
- }
- }
-
crc = f2fs_checkpoint_chksum(sbi, *cp_block);
if (crc != cur_cp_crc(*cp_block)) {
f2fs_put_page(*cp_page, 1);
- f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
+ f2fs_warn(sbi, "invalid crc value");
return -EINVAL;
}
@@ -869,9 +855,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
sbi->blocks_per_seg) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "invalid cp_pack_total_block_count:%u",
- le32_to_cpu(cp_block->cp_pack_total_block_count));
+ f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
+ le32_to_cpu(cp_block->cp_pack_total_block_count));
goto invalid_cp;
}
pre_version = *version;
@@ -905,6 +890,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned int cp_blks = 1 + __cp_payload(sbi);
block_t cp_blk_no;
int i;
+ int err;
sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks),
GFP_KERNEL);
@@ -932,6 +918,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
} else if (cp2) {
cur_page = cp2;
} else {
+ err = -EFSCORRUPTED;
goto fail_no_cp;
}
@@ -944,8 +931,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
sbi->cur_cp_pack = 2;
/* Sanity checking of checkpoint */
- if (f2fs_sanity_check_ckpt(sbi))
+ if (f2fs_sanity_check_ckpt(sbi)) {
+ err = -EFSCORRUPTED;
goto free_fail_no_cp;
+ }
if (cp_blks <= 1)
goto done;
@@ -959,8 +948,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi)
unsigned char *ckpt = (unsigned char *)sbi->ckpt;
cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i);
- if (IS_ERR(cur_page))
+ if (IS_ERR(cur_page)) {
+ err = PTR_ERR(cur_page);
goto free_fail_no_cp;
+ }
sit_bitmap_ptr = page_address(cur_page);
memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
f2fs_put_page(cur_page, 1);
@@ -975,7 +966,7 @@ free_fail_no_cp:
f2fs_put_page(cp2, 1);
fail_no_cp:
kvfree(sbi->ckpt);
- return -EINVAL;
+ return err;
}
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
@@ -1142,17 +1133,24 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
static bool __need_flush_quota(struct f2fs_sb_info *sbi)
{
+ bool ret = false;
+
if (!is_journalled_quota(sbi))
return false;
- if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
- return false;
- if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
- return false;
- if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
- return true;
- if (get_pages(sbi, F2FS_DIRTY_QDATA))
- return true;
- return false;
+
+ down_write(&sbi->quota_sem);
+ if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+ ret = false;
+ } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) {
+ clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ ret = true;
+ } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
+ ret = true;
+ }
+ up_write(&sbi->quota_sem);
+ return ret;
}
/*
@@ -1171,26 +1169,22 @@ static int block_operations(struct f2fs_sb_info *sbi)
blk_start_plug(&plug);
retry_flush_quotas:
+ f2fs_lock_all(sbi);
if (__need_flush_quota(sbi)) {
int locked;
if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
- f2fs_lock_all(sbi);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
goto retry_flush_dents;
}
- clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ f2fs_unlock_all(sbi);
/* only failed during mount/umount/freeze/quotactl */
locked = down_read_trylock(&sbi->sb->s_umount);
f2fs_quota_sync(sbi->sb, -1);
if (locked)
up_read(&sbi->sb->s_umount);
- }
-
- f2fs_lock_all(sbi);
- if (__need_flush_quota(sbi)) {
- f2fs_unlock_all(sbi);
cond_resched();
goto retry_flush_quotas;
}
@@ -1212,12 +1206,6 @@ retry_flush_dents:
*/
down_write(&sbi->node_change);
- if (__need_flush_quota(sbi)) {
- up_write(&sbi->node_change);
- f2fs_unlock_all(sbi);
- goto retry_flush_quotas;
- }
-
if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
up_write(&sbi->node_change);
f2fs_unlock_all(sbi);
@@ -1313,7 +1301,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
- if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
+ is_sbi_flag_set(sbi, SBI_IS_RESIZEFS))
__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
@@ -1328,10 +1317,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
- /*
- * TODO: we count on fsck.f2fs to clear this flag until we figure out
- * missing cases which clear it incorrectly.
- */
+ else
+ __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
@@ -1571,8 +1558,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
if (cpc->reason != CP_PAUSE)
return 0;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Start checkpoint disabled!");
+ f2fs_warn(sbi, "Start checkpoint disabled!");
}
mutex_lock(&sbi->cp_mutex);
@@ -1638,8 +1624,7 @@ stop:
stat_inc_cp_count(sbi->stat_info);
if (cpc->reason & CP_RECOVERY)
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "checkpoint: version = %llx", ckpt_ver);
+ f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver);
/* do checkpoint periodically */
f2fs_update_time(sbi, CP_TIME);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a546ac8685ea..0ca530afc684 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -14,6 +14,7 @@
#include <linux/pagevec.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
+#include <linux/swap.h>
#include <linux/prefetch.h>
#include <linux/uio.h>
#include <linux/cleancache.h>
@@ -54,7 +55,7 @@ static bool __is_cp_guaranteed(struct page *page)
static enum count_type __read_io_type(struct page *page)
{
- struct address_space *mapping = page->mapping;
+ struct address_space *mapping = page_file_mapping(page);
if (mapping) {
struct inode *inode = mapping->host;
@@ -347,20 +348,20 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
io->bio = NULL;
}
-static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+static bool __has_merged_page(struct bio *bio, struct inode *inode,
struct page *page, nid_t ino)
{
struct bio_vec *bvec;
struct page *target;
struct bvec_iter_all iter_all;
- if (!io->bio)
+ if (!bio)
return false;
if (!inode && !page && !ino)
return true;
- bio_for_each_segment_all(bvec, io->bio, iter_all) {
+ bio_for_each_segment_all(bvec, bio, iter_all) {
target = bvec->bv_page;
if (fscrypt_is_bounce_page(target))
@@ -410,7 +411,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
down_read(&io->io_rwsem);
- ret = __has_merged_page(io, inode, page, ino);
+ ret = __has_merged_page(io->bio, inode, page, ino);
up_read(&io->io_rwsem);
}
if (ret)
@@ -454,7 +455,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
fio->is_por ? META_POR : (__is_meta_io(fio) ?
META_GENERIC : DATA_GENERIC_ENHANCE)))
- return -EFAULT;
+ return -EFSCORRUPTED;
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
@@ -480,6 +481,61 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
return 0;
}
+int f2fs_merge_page_bio(struct f2fs_io_info *fio)
+{
+ struct bio *bio = *fio->bio;
+ struct page *page = fio->encrypted_page ?
+ fio->encrypted_page : fio->page;
+
+ if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
+ return -EFSCORRUPTED;
+
+ trace_f2fs_submit_page_bio(page, fio);
+ f2fs_trace_ios(fio, 0);
+
+ if (bio && (*fio->last_block + 1 != fio->new_blkaddr ||
+ !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ }
+alloc_new:
+ if (!bio) {
+ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
+ BIO_MAX_PAGES, false, fio->type, fio->temp);
+ bio_set_op_attrs(bio, fio->op, fio->op_flags);
+ }
+
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ __submit_bio(fio->sbi, bio, fio->type);
+ bio = NULL;
+ goto alloc_new;
+ }
+
+ if (fio->io_wbc)
+ wbc_account_io(fio->io_wbc, page, PAGE_SIZE);
+
+ inc_page_count(fio->sbi, WB_DATA_TYPE(page));
+
+ *fio->last_block = fio->new_blkaddr;
+ *fio->bio = bio;
+
+ return 0;
+}
+
+static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
+ struct page *page)
+{
+ if (!bio)
+ return;
+
+ if (!__has_merged_page(*bio, NULL, page, 0))
+ return;
+
+ __submit_bio(sbi, *bio, DATA);
+ *bio = NULL;
+}
+
void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
@@ -733,7 +789,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
dn.data_blkaddr = ei.blk + index - ei.fofs;
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_err;
}
goto got_it;
@@ -753,7 +809,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
dn.data_blkaddr,
DATA_GENERIC_ENHANCE)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_err;
}
got_it:
@@ -1099,7 +1155,7 @@ next_block:
if (__is_valid_data_blkaddr(blkaddr) &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto sync_out;
}
@@ -1529,7 +1585,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
sector_t block_nr;
int ret = 0;
- block_in_file = (sector_t)page->index;
+ block_in_file = (sector_t)page_index(page);
last_block = block_in_file + nr_pages;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
blkbits;
@@ -1562,14 +1618,15 @@ got_it:
block_nr = map->m_pblk + block_in_file - map->m_lblk;
SetPageMappedToDisk(page);
- if (!PageUptodate(page) && !cleancache_get_page(page)) {
+ if (!PageUptodate(page) && (!PageSwapCache(page) &&
+ !cleancache_get_page(page))) {
SetPageUptodate(page);
goto confused;
}
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
- ret = -EFAULT;
+ ret = -EFSCORRUPTED;
goto out;
}
} else {
@@ -1660,7 +1717,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
prefetchw(&page->flags);
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
- page->index,
+ page_index(page),
readahead_gfp_mask(mapping)))
goto next_page;
}
@@ -1684,7 +1741,7 @@ next_page:
static int f2fs_read_data_page(struct file *file, struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int ret = -EAGAIN;
trace_f2fs_readpage(page, DATA);
@@ -1693,7 +1750,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
+ ret = f2fs_mpage_readpages(page_file_mapping(page),
+ NULL, page, 1, false);
return ret;
}
@@ -1851,7 +1909,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE))
- return -EFAULT;
+ return -EFSCORRUPTED;
ipu_force = true;
fio->need_lock = LOCK_DONE;
@@ -1878,7 +1936,7 @@ got_it:
if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto out_writepage;
}
/*
@@ -1946,6 +2004,8 @@ out:
}
static int __write_data_page(struct page *page, bool *submitted,
+ struct bio **bio,
+ sector_t *last_block,
struct writeback_control *wbc,
enum iostat_type io_type)
{
@@ -1971,6 +2031,8 @@ static int __write_data_page(struct page *page, bool *submitted,
.need_lock = LOCK_RETRY,
.io_type = io_type,
.io_wbc = wbc,
+ .bio = bio,
+ .last_block = last_block,
};
trace_f2fs_writepage(page, DATA);
@@ -2069,10 +2131,13 @@ out:
unlock_page(page);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
- !F2FS_I(inode)->cp_task)
+ !F2FS_I(inode)->cp_task) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_balance_fs(sbi, need_balance_fs);
+ }
if (unlikely(f2fs_cp_error(sbi))) {
+ f2fs_submit_ipu_bio(sbi, bio, page);
f2fs_submit_merged_write(sbi, DATA);
submitted = NULL;
}
@@ -2099,7 +2164,7 @@ redirty_out:
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
- return __write_data_page(page, NULL, wbc, FS_DATA_IO);
+ return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
}
/*
@@ -2115,6 +2180,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
int done = 0;
struct pagevec pvec;
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
+ struct bio *bio = NULL;
+ sector_t last_block;
int nr_pages;
pgoff_t uninitialized_var(writeback_index);
pgoff_t index;
@@ -2191,17 +2258,20 @@ continue_unlock:
}
if (PageWriteback(page)) {
- if (wbc->sync_mode != WB_SYNC_NONE)
+ if (wbc->sync_mode != WB_SYNC_NONE) {
f2fs_wait_on_page_writeback(page,
DATA, true, true);
- else
+ f2fs_submit_ipu_bio(sbi, &bio, page);
+ } else {
goto continue_unlock;
+ }
}
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
- ret = __write_data_page(page, &submitted, wbc, io_type);
+ ret = __write_data_page(page, &submitted, &bio,
+ &last_block, wbc, io_type);
if (unlikely(ret)) {
/*
* keep nr_to_write, since vfs uses this to
@@ -2250,6 +2320,9 @@ continue_unlock:
if (nwritten)
f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
NULL, 0, DATA);
+ /* submit cached bio of IPU write */
+ if (bio)
+ __submit_bio(sbi, bio, DATA);
return ret;
}
@@ -2261,6 +2334,9 @@ static inline bool __should_serialize_io(struct inode *inode,
return false;
if (IS_NOQUOTA(inode))
return false;
+ /* to avoid deadlock in path of data flush */
+ if (F2FS_I(inode)->cp_task)
+ return false;
if (wbc->sync_mode != WB_SYNC_ALL)
return true;
if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2532,7 +2608,7 @@ repeat:
} else {
if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto fail;
}
err = f2fs_submit_page_read(inode, page, blkaddr);
@@ -2777,13 +2853,14 @@ int f2fs_release_page(struct page *page, gfp_t wait)
static int f2fs_set_data_page_dirty(struct page *page)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
trace_f2fs_set_page_dirty(page, DATA);
if (!PageUptodate(page))
SetPageUptodate(page);
+ if (PageSwapCache(page))
+ return __set_page_dirty_nobuffers(page);
if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
@@ -2875,6 +2952,126 @@ int f2fs_migrate_page(struct address_space *mapping,
}
#endif
+#ifdef CONFIG_SWAP
+/* Copied from generic_swapfile_activate() to check any holes */
+static int check_swap_activate(struct file *swap_file, unsigned int max)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ unsigned blocks_per_page;
+ unsigned long page_no;
+ unsigned blkbits;
+ sector_t probe_block;
+ sector_t last_block;
+ sector_t lowest_block = -1;
+ sector_t highest_block = 0;
+
+ blkbits = inode->i_blkbits;
+ blocks_per_page = PAGE_SIZE >> blkbits;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ probe_block = 0;
+ page_no = 0;
+ last_block = i_size_read(inode) >> blkbits;
+ while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
+ unsigned block_in_page;
+ sector_t first_block;
+
+ cond_resched();
+
+ first_block = bmap(inode, probe_block);
+ if (first_block == 0)
+ goto bad_bmap;
+
+ /*
+ * It must be PAGE_SIZE aligned on-disk
+ */
+ if (first_block & (blocks_per_page - 1)) {
+ probe_block++;
+ goto reprobe;
+ }
+
+ for (block_in_page = 1; block_in_page < blocks_per_page;
+ block_in_page++) {
+ sector_t block;
+
+ block = bmap(inode, probe_block + block_in_page);
+ if (block == 0)
+ goto bad_bmap;
+ if (block != first_block + block_in_page) {
+ /* Discontiguity */
+ probe_block++;
+ goto reprobe;
+ }
+ }
+
+ first_block >>= (PAGE_SHIFT - blkbits);
+ if (page_no) { /* exclude the header page */
+ if (first_block < lowest_block)
+ lowest_block = first_block;
+ if (first_block > highest_block)
+ highest_block = first_block;
+ }
+
+ page_no++;
+ probe_block += blocks_per_page;
+reprobe:
+ continue;
+ }
+ return 0;
+
+bad_bmap:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ struct inode *inode = file_inode(file);
+ int ret;
+
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if (f2fs_readonly(F2FS_I_SB(inode)->sb))
+ return -EROFS;
+
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+
+ ret = check_swap_activate(file, sis->max);
+ if (ret)
+ return ret;
+
+ set_inode_flag(inode, FI_PIN_FILE);
+ f2fs_precache_extents(inode);
+ f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
+ return 0;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+ struct inode *inode = file_inode(file);
+
+ clear_inode_flag(inode, FI_PIN_FILE);
+}
+#else
+static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ return -EOPNOTSUPP;
+}
+
+static void f2fs_swap_deactivate(struct file *file)
+{
+}
+#endif
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,
@@ -2887,6 +3084,8 @@ const struct address_space_operations f2fs_dblock_aops = {
.releasepage = f2fs_release_page,
.direct_IO = f2fs_direct_IO,
.bmap = f2fs_bmap,
+ .swap_activate = f2fs_swap_activate,
+ .swap_deactivate = f2fs_swap_deactivate,
#ifdef CONFIG_MIGRATION
.migratepage = f2fs_migrate_page,
#endif
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 99e9a5c37b71..7706049d23bf 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -27,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex);
static void update_general_status(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
+ struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
int i;
+ /* these will be changed if online resize is done */
+ si->main_area_segs = le32_to_cpu(raw_super->segment_count_main);
+ si->main_area_sections = le32_to_cpu(raw_super->section_count);
+ si->main_area_zones = si->main_area_sections /
+ le32_to_cpu(raw_super->secs_per_zone);
+
/* validation check of the segment numbers */
si->hit_largest = atomic64_read(&sbi->read_hit_largest);
si->hit_cached = atomic64_read(&sbi->read_hit_cached);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 59bc46017855..85a1528f319f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -218,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
- f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING,
- "Corrupted max_depth of %lu: %u",
- dir->i_ino, max_depth);
+ f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u",
+ dir->i_ino, max_depth);
max_depth = MAX_DIR_HASH_DEPTH;
f2fs_i_depth_write(dir, max_depth);
}
@@ -816,11 +815,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
if (unlikely(bit_pos > d->max ||
le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted namelen=%d, run fsck to fix.",
- __func__, le16_to_cpu(de->name_len));
+ f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.",
+ __func__, le16_to_cpu(de->name_len));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ err = -EFSCORRUPTED;
goto out;
}
@@ -828,8 +826,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
int save_len = fstr->len;
err = fscrypt_fname_disk_to_usr(d->inode,
- (u32)de->hash_code, 0,
- &de_name, fstr);
+ (u32)le32_to_cpu(de->hash_code),
+ 0, &de_name, fstr);
if (err)
goto out;
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index caf77fe8ac07..e60078460ad1 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -184,10 +184,9 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
next_re = rb_entry(next, struct rb_entry, rb_node);
if (cur_re->ofs + cur_re->len > next_re->ofs) {
- f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, "
- "cur(%u, %u) next(%u, %u)",
- cur_re->ofs, cur_re->len,
- next_re->ofs, next_re->len);
+ f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
+ cur_re->ofs, cur_re->len,
+ next_re->ofs, next_re->len);
return false;
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 06b89a9862ab..17382da7f0bd 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -136,6 +136,9 @@ struct f2fs_mount_info {
int alloc_mode; /* segment allocation policy */
int fsync_mode; /* fsync policy */
bool test_dummy_encryption; /* test dummy encryption */
+ block_t unusable_cap; /* Amount of space allowed to be
+ * unusable when disabling checkpoint
+ */
};
#define F2FS_FEATURE_ENCRYPT 0x0001
@@ -412,6 +415,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
+#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64)
#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
@@ -476,8 +480,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode);
#define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
BITS_PER_BYTE + 1))
-#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \
- BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_DENTRY_BITMAP_SIZE(inode) \
+ DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE)
#define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \
((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
NR_INLINE_DENTRY(inode) + \
@@ -1052,6 +1056,8 @@ struct f2fs_io_info {
bool retry; /* need to reallocate block address */
enum iostat_type io_type; /* io type */
struct writeback_control *io_wbc; /* writeback control */
+ struct bio **bio; /* bio for ipu */
+ sector_t *last_block; /* last block number in bio */
unsigned char version; /* version of the node */
};
@@ -1111,6 +1117,7 @@ enum {
SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
+ SBI_IS_RESIZEFS, /* resizefs is in process */
};
enum {
@@ -1207,6 +1214,7 @@ struct f2fs_sb_info {
/* for inode management */
struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */
spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */
+ struct mutex flush_lock; /* for flush exclusion */
/* for extent tree cache */
struct radix_tree_root extent_tree_root;/* cache extent cache entries */
@@ -1230,6 +1238,7 @@ struct f2fs_sb_info {
unsigned int segs_per_sec; /* segments per section */
unsigned int secs_per_zone; /* sections per zone */
unsigned int total_sections; /* total section count */
+ struct mutex resize_mutex; /* for resize exclusion */
unsigned int total_node_count; /* total node block count */
unsigned int total_valid_node_count; /* valid node block count */
loff_t max_file_blocks; /* max block index of file */
@@ -1247,6 +1256,7 @@ struct f2fs_sb_info {
block_t unusable_block_count; /* # of blocks saved by last cp */
unsigned int nquota_files; /* # of quota sysfile */
+ struct rw_semaphore quota_sem; /* blocking cp for flags */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
@@ -1488,7 +1498,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping)
static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page)
{
- return F2FS_M_SB(page->mapping);
+ return F2FS_M_SB(page_file_mapping(page));
}
static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
@@ -1766,8 +1776,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, true))
avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
- avail_user_block_count -= sbi->unusable_block_count;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (avail_user_block_count > sbi->unusable_block_count)
+ avail_user_block_count -= sbi->unusable_block_count;
+ else
+ avail_user_block_count = 0;
+ }
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
if (diff > *count)
@@ -1795,7 +1809,20 @@ enospc:
return -ENOSPC;
}
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
+__printf(2, 3)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...);
+
+#define f2fs_err(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__)
+#define f2fs_warn(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__)
+#define f2fs_notice(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__)
+#define f2fs_info(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__)
+#define f2fs_debug(sbi, fmt, ...) \
+ f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__)
+
static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
struct inode *inode,
block_t count)
@@ -1811,11 +1838,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
sbi->current_reserved_blocks + count);
spin_unlock(&sbi->stat_lock);
if (unlikely(inode->i_blocks < sectors)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
- inode->i_ino,
- (unsigned long long)inode->i_blocks,
- (unsigned long long)sectors);
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks,
+ (unsigned long long)sectors);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return;
}
@@ -1967,7 +1993,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
struct inode *inode, bool is_inode)
{
block_t valid_block_count;
- unsigned int valid_node_count;
+ unsigned int valid_node_count, user_block_count;
int err;
if (is_inode) {
@@ -1994,10 +2020,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
if (!__allow_reserved_blocks(sbi, inode, false))
valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+ user_block_count = sbi->user_block_count;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
- valid_block_count += sbi->unusable_block_count;
+ user_block_count -= sbi->unusable_block_count;
- if (unlikely(valid_block_count > sbi->user_block_count)) {
+ if (unlikely(valid_block_count > user_block_count)) {
spin_unlock(&sbi->stat_lock);
goto enospc;
}
@@ -2052,10 +2079,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
dquot_free_inode(inode);
} else {
if (unlikely(inode->i_blocks == 0)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
- inode->i_ino,
- (unsigned long long)inode->i_blocks);
+ f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino,
+ (unsigned long long)inode->i_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
return;
}
@@ -2191,6 +2217,9 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi,
static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
+ if (sbi->gc_mode == GC_URGENT)
+ return true;
+
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
get_pages(sbi, F2FS_WB_CP_DATA) ||
@@ -2198,7 +2227,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
get_pages(sbi, F2FS_DIO_WRITE))
return false;
- if (SM_I(sbi) && SM_I(sbi)->dcc_info &&
+ if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
return false;
@@ -2320,57 +2349,23 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
}
/*
- * Inode flags
+ * On-disk inode flags (f2fs_inode::i_flags)
*/
-#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */
-#define F2FS_UNRM_FL 0x00000002 /* Undelete */
-#define F2FS_COMPR_FL 0x00000004 /* Compress file */
#define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */
#define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
#define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */
#define F2FS_NODUMP_FL 0x00000040 /* do not dump file */
#define F2FS_NOATIME_FL 0x00000080 /* do not update atime */
-/* Reserved for compression usage... */
-#define F2FS_DIRTY_FL 0x00000100
-#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
-#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */
-#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */
-/* End compression flags --- maybe not all used */
#define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */
-#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */
-#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
-#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
-#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
-#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
-#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */
-#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
-#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
-#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */
-#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
-#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
-
-#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */
-#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
-
-/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
-#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \
- F2FS_IMMUTABLE_FL | \
- F2FS_APPEND_FL | \
- F2FS_NODUMP_FL | \
- F2FS_NOATIME_FL | \
- F2FS_PROJINHERIT_FL)
/* Flags that should be inherited by new inodes from their parent. */
-#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\
- F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\
- F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\
- F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\
- F2FS_PROJINHERIT_FL)
+#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
+ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)
/* Flags that are appropriate for regular files (all but dir-specific ones). */
-#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL))
+#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL))
/* Flags that are appropriate for non-directories/regular files. */
#define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL)
@@ -2856,9 +2851,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "invalid blkaddr: %u, type: %d, run fsck to fix.",
- blkaddr, type);
+ f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.",
+ blkaddr, type);
f2fs_bug_on(sbi, 1);
}
}
@@ -2989,8 +2983,6 @@ int f2fs_quota_sync(struct super_block *sb, int type);
void f2fs_quota_off_umount(struct super_block *sb);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
-extern __printf(3, 4)
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...);
int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
@@ -3074,9 +3066,12 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
-int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3169,6 +3164,7 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
nid_t ino, enum page_type type);
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi);
int f2fs_submit_page_bio(struct f2fs_io_info *fio);
+int f2fs_merge_page_bio(struct f2fs_io_info *fio);
void f2fs_submit_page_write(struct f2fs_io_info *fio);
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
block_t blk_addr, struct bio *bio);
@@ -3214,6 +3210,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
/*
* recovery.c
@@ -3686,7 +3683,8 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
if (test_opt(sbi, LFS) && (rw == WRITE) &&
block_unaligned_IO(inode, iocb, iter))
return true;
- if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+ if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
+ !(inode->i_flags & S_SWAPFILE))
return true;
return false;
@@ -3712,4 +3710,7 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
return false;
}
+#define EFSBADCRC EBADMSG /* Bad CRC detected */
+#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
+
#endif /* _LINUX_F2FS_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 45b45f37d347..f8d46df8fa9e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -707,11 +707,9 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
stat->btime.tv_nsec = fi->i_crtime.tv_nsec;
}
- flags = fi->i_flags & F2FS_FL_USER_VISIBLE;
+ flags = fi->i_flags;
if (flags & F2FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
- if (flags & F2FS_COMPR_FL)
- stat->attributes |= STATX_ATTR_COMPRESSED;
if (IS_ENCRYPTED(inode))
stat->attributes |= STATX_ATTR_ENCRYPTED;
if (flags & F2FS_IMMUTABLE_FL)
@@ -720,7 +718,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
stat->attributes |= STATX_ATTR_NODUMP;
stat->attributes_mask |= (STATX_ATTR_APPEND |
- STATX_ATTR_COMPRESSED |
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
@@ -1026,7 +1023,7 @@ next_dnode:
!f2fs_is_valid_blkaddr(sbi, *blkaddr,
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
- return -EFAULT;
+ return -EFSCORRUPTED;
}
if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) {
@@ -1214,7 +1211,7 @@ roll_back:
static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
int ret;
@@ -1467,7 +1464,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
- idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
+ idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
/* avoid gc operation during block exchange */
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -1531,7 +1528,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
if (off_end)
map.m_len++;
- err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ if (f2fs_is_pinned_file(inode))
+ map.m_seg_type = CURSEG_COLD_DATA;
+
+ err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ?
+ F2FS_GET_BLOCK_PRE_DIO :
+ F2FS_GET_BLOCK_PRE_AIO));
if (err) {
pgoff_t last_off;
@@ -1648,44 +1650,22 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
return 0;
}
-static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
+static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
{
- struct inode *inode = file_inode(filp);
struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int flags = fi->i_flags;
-
- if (IS_ENCRYPTED(inode))
- flags |= F2FS_ENCRYPT_FL;
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
- flags |= F2FS_INLINE_DATA_FL;
- if (is_inode_flag_set(inode, FI_PIN_FILE))
- flags |= F2FS_NOCOW_FL;
-
- flags &= F2FS_FL_USER_VISIBLE;
-
- return put_user(flags, (int __user *)arg);
-}
-
-static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
-{
- struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int oldflags;
+ u32 oldflags;
/* Is it quota file? Do not allow user to mess with it */
if (IS_NOQUOTA(inode))
return -EPERM;
- flags = f2fs_mask_flags(inode->i_mode, flags);
-
oldflags = fi->i_flags;
- if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
+ if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
if (!capable(CAP_LINUX_IMMUTABLE))
return -EPERM;
- flags = flags & F2FS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE;
- fi->i_flags = flags;
+ fi->i_flags = iflags | (oldflags & ~mask);
if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
@@ -1698,26 +1678,124 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags)
return 0;
}
+/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */
+
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry
+ * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to
+ * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add
+ * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 fsflag;
+} f2fs_fsflags_map[] = {
+ { F2FS_SYNC_FL, FS_SYNC_FL },
+ { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL },
+ { F2FS_APPEND_FL, FS_APPEND_FL },
+ { F2FS_NODUMP_FL, FS_NODUMP_FL },
+ { F2FS_NOATIME_FL, FS_NOATIME_FL },
+ { F2FS_INDEX_FL, FS_INDEX_FL },
+ { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL },
+ { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL },
+};
+
+#define F2FS_GETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_INDEX_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL | \
+ FS_ENCRYPT_FL | \
+ FS_INLINE_DATA_FL | \
+ FS_NOCOW_FL)
+
+#define F2FS_SETTABLE_FS_FL ( \
+ FS_SYNC_FL | \
+ FS_IMMUTABLE_FL | \
+ FS_APPEND_FL | \
+ FS_NODUMP_FL | \
+ FS_NOATIME_FL | \
+ FS_DIRSYNC_FL | \
+ FS_PROJINHERIT_FL)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */
+static inline u32 f2fs_iflags_to_fsflags(u32 iflags)
+{
+ u32 fsflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (iflags & f2fs_fsflags_map[i].iflag)
+ fsflags |= f2fs_fsflags_map[i].fsflag;
+
+ return fsflags;
+}
+
+/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */
+static inline u32 f2fs_fsflags_to_iflags(u32 fsflags)
+{
+ u32 iflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++)
+ if (fsflags & f2fs_fsflags_map[i].fsflag)
+ iflags |= f2fs_fsflags_map[i].iflag;
+
+ return iflags;
+}
+
+static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
+{
+ struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
+
+ if (IS_ENCRYPTED(inode))
+ fsflags |= FS_ENCRYPT_FL;
+ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
+ fsflags |= FS_INLINE_DATA_FL;
+ if (is_inode_flag_set(inode, FI_PIN_FILE))
+ fsflags |= FS_NOCOW_FL;
+
+ fsflags &= F2FS_GETTABLE_FS_FL;
+
+ return put_user(fsflags, (int __user *)arg);
+}
+
static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- unsigned int flags;
+ u32 fsflags;
+ u32 iflags;
int ret;
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (get_user(flags, (int __user *)arg))
+ if (get_user(fsflags, (int __user *)arg))
return -EFAULT;
+ if (fsflags & ~F2FS_GETTABLE_FS_FL)
+ return -EOPNOTSUPP;
+ fsflags &= F2FS_SETTABLE_FS_FL;
+
+ iflags = f2fs_fsflags_to_iflags(fsflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
+ return -EOPNOTSUPP;
+
ret = mnt_want_write_file(filp);
if (ret)
return ret;
inode_lock(inode);
- ret = __f2fs_ioc_setflags(inode, flags);
-
+ ret = f2fs_setflags_common(inode, iflags,
+ f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL));
inode_unlock(inode);
mnt_drop_write_file(filp);
return ret;
@@ -1764,9 +1842,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
* f2fs_is_atomic_file.
*/
if (get_dirty_pages(inode))
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Unexpected flush for atomic writes: ino=%lu, npages=%u",
- inode->i_ino, get_dirty_pages(inode));
+ f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u",
+ inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
if (ret) {
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2201,8 +2278,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
return -EROFS;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Skipping Checkpoint. Checkpoints currently disabled.");
+ f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled.");
return -EINVAL;
}
@@ -2291,7 +2367,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
if (!fragmented)
goto out;
- sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi);
+ sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi));
/*
* make sure there are enough free section for LFS allocation, this can
@@ -2587,10 +2663,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num ||
__is_large_section(sbi)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Can't flush %u in %d for segs_per_sec %u != 1",
- range.dev_num, sbi->s_ndevs,
- sbi->segs_per_sec);
+ f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1",
+ range.dev_num, sbi->s_ndevs, sbi->segs_per_sec);
return -EINVAL;
}
@@ -2727,47 +2801,56 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid)
}
#endif
-/* Transfer internal flags to xflags */
-static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags)
-{
- __u32 xflags = 0;
-
- if (iflags & F2FS_SYNC_FL)
- xflags |= FS_XFLAG_SYNC;
- if (iflags & F2FS_IMMUTABLE_FL)
- xflags |= FS_XFLAG_IMMUTABLE;
- if (iflags & F2FS_APPEND_FL)
- xflags |= FS_XFLAG_APPEND;
- if (iflags & F2FS_NODUMP_FL)
- xflags |= FS_XFLAG_NODUMP;
- if (iflags & F2FS_NOATIME_FL)
- xflags |= FS_XFLAG_NOATIME;
- if (iflags & F2FS_PROJINHERIT_FL)
- xflags |= FS_XFLAG_PROJINHERIT;
+/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */
+
+/*
+ * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable
+ * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its
+ * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS.
+ */
+
+static const struct {
+ u32 iflag;
+ u32 xflag;
+} f2fs_xflags_map[] = {
+ { F2FS_SYNC_FL, FS_XFLAG_SYNC },
+ { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE },
+ { F2FS_APPEND_FL, FS_XFLAG_APPEND },
+ { F2FS_NODUMP_FL, FS_XFLAG_NODUMP },
+ { F2FS_NOATIME_FL, FS_XFLAG_NOATIME },
+ { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT },
+};
+
+#define F2FS_SUPPORTED_XFLAGS ( \
+ FS_XFLAG_SYNC | \
+ FS_XFLAG_IMMUTABLE | \
+ FS_XFLAG_APPEND | \
+ FS_XFLAG_NODUMP | \
+ FS_XFLAG_NOATIME | \
+ FS_XFLAG_PROJINHERIT)
+
+/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */
+static inline u32 f2fs_iflags_to_xflags(u32 iflags)
+{
+ u32 xflags = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++)
+ if (iflags & f2fs_xflags_map[i].iflag)
+ xflags |= f2fs_xflags_map[i].xflag;
+
return xflags;
}
-#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \
- FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \
- FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT)
-
-/* Transfer xflags flags to internal */
-static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags)
+/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */
+static inline u32 f2fs_xflags_to_iflags(u32 xflags)
{
- unsigned long iflags = 0;
+ u32 iflags = 0;
+ int i;
- if (xflags & FS_XFLAG_SYNC)
- iflags |= F2FS_SYNC_FL;
- if (xflags & FS_XFLAG_IMMUTABLE)
- iflags |= F2FS_IMMUTABLE_FL;
- if (xflags & FS_XFLAG_APPEND)
- iflags |= F2FS_APPEND_FL;
- if (xflags & FS_XFLAG_NODUMP)
- iflags |= F2FS_NODUMP_FL;
- if (xflags & FS_XFLAG_NOATIME)
- iflags |= F2FS_NOATIME_FL;
- if (xflags & FS_XFLAG_PROJINHERIT)
- iflags |= F2FS_PROJINHERIT_FL;
+ for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++)
+ if (xflags & f2fs_xflags_map[i].xflag)
+ iflags |= f2fs_xflags_map[i].iflag;
return iflags;
}
@@ -2779,8 +2862,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
struct fsxattr fa;
memset(&fa, 0, sizeof(struct fsxattr));
- fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags &
- F2FS_FL_USER_VISIBLE);
+ fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags);
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
fa.fsx_projid = (__u32)from_kprojid(&init_user_ns,
@@ -2818,9 +2900,8 @@ static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa)
static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
- struct f2fs_inode_info *fi = F2FS_I(inode);
struct fsxattr fa;
- unsigned int flags;
+ u32 iflags;
int err;
if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa)))
@@ -2830,11 +2911,11 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
if (!inode_owner_or_capable(inode))
return -EACCES;
- if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS)
+ if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS)
return -EOPNOTSUPP;
- flags = f2fs_xflags_to_iflags(fa.fsx_xflags);
- if (f2fs_mask_flags(inode->i_mode, flags) != flags)
+ iflags = f2fs_xflags_to_iflags(fa.fsx_xflags);
+ if (f2fs_mask_flags(inode->i_mode, iflags) != iflags)
return -EOPNOTSUPP;
err = mnt_want_write_file(filp);
@@ -2845,9 +2926,8 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
err = f2fs_ioctl_check_project(inode, &fa);
if (err)
goto out;
- flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) |
- (flags & F2FS_FL_XFLAG_VISIBLE);
- err = __f2fs_ioc_setflags(inode, flags);
+ err = f2fs_setflags_common(inode, iflags,
+ f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS));
if (err)
goto out;
@@ -2869,10 +2949,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
fi->i_gc_failures[GC_FAILURE_PIN] + 1);
if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: Enable GC = ino %lx after %x GC trials",
- __func__, inode->i_ino,
- fi->i_gc_failures[GC_FAILURE_PIN]);
+ f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
+ __func__, inode->i_ino,
+ fi->i_gc_failures[GC_FAILURE_PIN]);
clear_inode_flag(inode, FI_PIN_FILE);
return -EAGAIN;
}
@@ -2885,9 +2964,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
__u32 pin;
int ret = 0;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
if (get_user(pin, (__u32 __user *)arg))
return -EFAULT;
@@ -2980,6 +3056,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg)
return f2fs_precache_extents(file_inode(filp));
}
+static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
+ __u64 block_count;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (f2fs_readonly(sbi->sb))
+ return -EROFS;
+
+ if (copy_from_user(&block_count, (void __user *)arg,
+ sizeof(block_count)))
+ return -EFAULT;
+
+ ret = f2fs_resize_fs(sbi, block_count);
+
+ return ret;
+}
+
long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
@@ -3036,6 +3133,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
return f2fs_ioc_set_pin_file(filp, arg);
case F2FS_IOC_PRECACHE_EXTENTS:
return f2fs_ioc_precache_extents(filp, arg);
+ case F2FS_IOC_RESIZE_FS:
+ return f2fs_ioc_resize_fs(filp, arg);
default:
return -ENOTTY;
}
@@ -3149,6 +3248,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case F2FS_IOC_GET_PIN_FILE:
case F2FS_IOC_SET_PIN_FILE:
case F2FS_IOC_PRECACHE_EXTENTS:
+ case F2FS_IOC_RESIZE_FS:
break;
default:
return -ENOIOCTLCMD;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 963fb4571fd9..6691f526fa40 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -311,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
- unsigned int last_segment = MAIN_SEGS(sbi);
+ unsigned int last_segment;
unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
+ last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
select_policy(sbi, gc_type, type, &p);
@@ -387,7 +388,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
goto next;
/* Don't touch checkpointed data */
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
- get_ckpt_valid_blocks(sbi, segno)))
+ get_ckpt_valid_blocks(sbi, segno) &&
+ p.alloc_mode != SSR))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
@@ -404,7 +406,8 @@ next:
sm->last_victim[p.gc_mode] = last_victim + 1;
else
sm->last_victim[p.gc_mode] = segno + 1;
- sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi);
+ sm->last_victim[p.gc_mode] %=
+ (MAIN_SECS(sbi) * sbi->segs_per_sec);
break;
}
}
@@ -615,9 +618,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
}
if (sum->version != dni->version) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: valid data with mismatched node version.",
- __func__);
+ f2fs_warn(sbi, "%s: valid data with mismatched node version.",
+ __func__);
set_sbi_flag(sbi, SBI_NEED_FSCK);
}
@@ -658,7 +660,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
dn.data_blkaddr = ei.blk + index - ei.fofs;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ))) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_page;
}
goto got_it;
@@ -676,7 +678,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
}
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE))) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto put_page;
}
got_it:
@@ -1180,9 +1182,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
sum = page_address(sum_page);
if (type != GET_SUM_TYPE((&sum->footer))) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) "
- "type [%d, %d] in SSA and SIT",
- segno, type, GET_SUM_TYPE((&sum->footer)));
+ f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
+ segno, type, GET_SUM_TYPE((&sum->footer)));
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_stop_checkpoint(sbi, false);
goto skip;
@@ -1360,3 +1361,176 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
}
+
+static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start,
+ unsigned int end)
+{
+ int type;
+ unsigned int segno, next_inuse;
+ int err = 0;
+
+ /* Move out cursegs from the target range */
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+ allocate_segment_for_resize(sbi, type, start, end);
+
+ /* do GC to move out valid blocks in the range */
+ for (segno = start; segno <= end; segno += sbi->segs_per_sec) {
+ struct gc_inode_list gc_list = {
+ .ilist = LIST_HEAD_INIT(gc_list.ilist),
+ .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
+ };
+
+ mutex_lock(&sbi->gc_mutex);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+ mutex_unlock(&sbi->gc_mutex);
+ put_gc_inode(&gc_list);
+
+ if (get_valid_blocks(sbi, segno, true))
+ return -EAGAIN;
+ }
+
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err)
+ return err;
+
+ next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start);
+ if (next_inuse <= end) {
+ f2fs_err(sbi, "segno %u should be free but still inuse!",
+ next_inuse);
+ f2fs_bug_on(sbi, 1);
+ }
+ return err;
+}
+
+static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi);
+ int section_count = le32_to_cpu(raw_sb->section_count);
+ int segment_count = le32_to_cpu(raw_sb->segment_count);
+ int segment_count_main = le32_to_cpu(raw_sb->segment_count_main);
+ long long block_count = le64_to_cpu(raw_sb->block_count);
+ int segs = secs * sbi->segs_per_sec;
+
+ raw_sb->section_count = cpu_to_le32(section_count + secs);
+ raw_sb->segment_count = cpu_to_le32(segment_count + segs);
+ raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
+ raw_sb->block_count = cpu_to_le64(block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
+{
+ int segs = secs * sbi->segs_per_sec;
+ long long user_block_count =
+ le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
+
+ SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs;
+ MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs;
+ FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs;
+ FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs;
+ F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count +
+ (long long)segs * sbi->blocks_per_seg);
+}
+
+int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
+{
+ __u64 old_block_count, shrunk_blocks;
+ unsigned int secs;
+ int gc_mode, gc_type;
+ int err = 0;
+ __u32 rem;
+
+ old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count);
+ if (block_count > old_block_count)
+ return -EINVAL;
+
+ /* new fs size should align to section size */
+ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem);
+ if (rem)
+ return -EINVAL;
+
+ if (block_count == old_block_count)
+ return 0;
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+ f2fs_err(sbi, "Should run fsck to repair first.");
+ return -EFSCORRUPTED;
+ }
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ f2fs_err(sbi, "Checkpoint should be enabled.");
+ return -EINVAL;
+ }
+
+ freeze_bdev(sbi->sb->s_bdev);
+
+ shrunk_blocks = old_block_count - block_count;
+ secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi));
+ spin_lock(&sbi->stat_lock);
+ if (shrunk_blocks + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ err = -ENOSPC;
+ else
+ sbi->user_block_count -= shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ if (err) {
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+ }
+
+ mutex_lock(&sbi->resize_mutex);
+ set_sbi_flag(sbi, SBI_IS_RESIZEFS);
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+
+ MAIN_SECS(sbi) -= secs;
+
+ for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++)
+ if (SIT_I(sbi)->last_victim[gc_mode] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ SIT_I(sbi)->last_victim[gc_mode] = 0;
+
+ for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++)
+ if (sbi->next_victim_seg[gc_type] >=
+ MAIN_SECS(sbi) * sbi->segs_per_sec)
+ sbi->next_victim_seg[gc_type] = NULL_SEGNO;
+
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+
+ err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec,
+ MAIN_SEGS(sbi) - 1);
+ if (err)
+ goto out;
+
+ update_sb_metadata(sbi, -secs);
+
+ err = f2fs_commit_super(sbi, false);
+ if (err) {
+ update_sb_metadata(sbi, secs);
+ goto out;
+ }
+
+ update_fs_metadata(sbi, -secs);
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ err = f2fs_sync_fs(sbi->sb, 1);
+ if (err) {
+ update_fs_metadata(sbi, secs);
+ update_sb_metadata(sbi, secs);
+ f2fs_commit_super(sbi, false);
+ }
+out:
+ if (err) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
+
+ MAIN_SECS(sbi) += secs;
+ spin_lock(&sbi->stat_lock);
+ sbi->user_block_count += shrunk_blocks;
+ spin_unlock(&sbi->stat_lock);
+ }
+ clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
+ mutex_unlock(&sbi->resize_mutex);
+ thaw_bdev(sbi->sb->s_bdev, sbi->sb);
+ return err;
+}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 404d2462a0fe..3613efca8c00 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -140,11 +140,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (unlikely(dn->data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(dn);
set_sbi_flag(fio.sbi, SBI_NEED_FSCK);
- f2fs_msg(fio.sbi->sb, KERN_WARNING,
- "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
- "run fsck to fix.",
- __func__, dn->inode->i_ino, dn->data_blkaddr);
- return -EINVAL;
+ f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dn->inode->i_ino, dn->data_blkaddr);
+ return -EFSCORRUPTED;
}
f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
@@ -383,11 +381,9 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage,
if (unlikely(dn.data_blkaddr != NEW_ADDR)) {
f2fs_put_dnode(&dn);
set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
- f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING,
- "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, "
- "run fsck to fix.",
- __func__, dir->i_ino, dn.data_blkaddr);
- err = -EINVAL;
+ f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.",
+ __func__, dir->i_ino, dn.data_blkaddr);
+ err = -EFSCORRUPTED;
goto out;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index ccb02226dd2c..a33d7a849b2d 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -74,7 +74,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi,
if (!__is_valid_data_blkaddr(addr))
return 1;
if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE))
- return -EFAULT;
+ return -EFSCORRUPTED;
return 0;
}
@@ -176,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
calculated = f2fs_inode_chksum(sbi, page);
if (provided != calculated)
- f2fs_msg(sbi->sb, KERN_WARNING,
- "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
- page->index, ino_of_node(page), provided, calculated);
+ f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
+ page->index, ino_of_node(page), provided, calculated);
return provided == calculated;
}
@@ -202,50 +201,41 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks);
if (!iblocks) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, "
- "run fsck to fix.",
- __func__, inode->i_ino, iblocks);
+ f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.",
+ __func__, inode->i_ino, iblocks);
return false;
}
if (ino_of_node(node_page) != nid_of_node(node_page)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode footer i_ino=%lx, ino,nid: "
- "[%u, %u] run fsck to fix.",
- __func__, inode->i_ino,
- ino_of_node(node_page), nid_of_node(node_page));
+ f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.",
+ __func__, inode->i_ino,
+ ino_of_node(node_page), nid_of_node(node_page));
return false;
}
if (f2fs_sb_has_flexible_inline_xattr(sbi)
&& !f2fs_has_extra_attr(inode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: corrupted inode ino=%lx, run fsck to fix.",
- __func__, inode->i_ino);
+ f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
+ __func__, inode->i_ino);
return false;
}
if (f2fs_has_extra_attr(inode) &&
!f2fs_sb_has_extra_attr(sbi)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) is with extra_attr, "
- "but extra_attr feature is off",
- __func__, inode->i_ino);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
+ __func__, inode->i_ino);
return false;
}
if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE ||
fi->i_extra_isize % sizeof(__le32)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, "
- "max: %zu",
- __func__, inode->i_ino, fi->i_extra_isize,
- F2FS_TOTAL_EXTRA_ATTR_SIZE);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_extra_isize,
+ F2FS_TOTAL_EXTRA_ATTR_SIZE);
return false;
}
@@ -255,11 +245,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) has corrupted "
- "i_inline_xattr_size: %d, max: %zu",
- __func__, inode->i_ino, fi->i_inline_xattr_size,
- MAX_INLINE_XATTR_SIZE);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ __func__, inode->i_ino, fi->i_inline_xattr_size,
+ MAX_INLINE_XATTR_SIZE);
return false;
}
@@ -272,11 +260,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
!f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1,
DATA_GENERIC_ENHANCE))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx) extent info [%u, %u, %u] "
- "is incorrect, run fsck to fix",
- __func__, inode->i_ino,
- ei->blk, ei->fofs, ei->len);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
+ __func__, inode->i_ino,
+ ei->blk, ei->fofs, ei->len);
return false;
}
}
@@ -284,19 +270,15 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
if (f2fs_has_inline_data(inode) &&
(!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx, mode=%u) should not have "
- "inline_data, run fsck to fix",
- __func__, inode->i_ino, inode->i_mode);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
return false;
}
if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: inode (ino=%lx, mode=%u) should not have "
- "inline_dentry, run fsck to fix",
- __func__, inode->i_ino, inode->i_mode);
+ f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix",
+ __func__, inode->i_ino, inode->i_mode);
return false;
}
@@ -343,6 +325,8 @@ static int do_read_inode(struct inode *inode)
le16_to_cpu(ri->i_gc_failures);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
+ if (S_ISREG(inode->i_mode))
+ fi->i_flags &= ~F2FS_PROJINHERIT_FL;
fi->flags = 0;
fi->i_advise = ri->i_advise;
fi->i_pino = le32_to_cpu(ri->i_pino);
@@ -374,7 +358,7 @@ static int do_read_inode(struct inode *inode)
if (!sanity_check_inode(inode, node_page)) {
f2fs_put_page(node_page, 1);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
/* check data exist */
@@ -783,8 +767,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "May loss orphan inode, run fsck to fix.");
+ f2fs_warn(sbi, "May loss orphan inode, run fsck to fix.");
goto out;
}
@@ -792,8 +775,7 @@ void f2fs_handle_failed_inode(struct inode *inode)
err = f2fs_acquire_orphan_inode(sbi);
if (err) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Too many orphan inodes, run fsck to fix.");
+ f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix.");
} else {
f2fs_add_orphan_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 0f77f9242751..c5b99042e6f2 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -385,9 +385,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
int err = 0;
if (f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "skip recovering inline_dots inode (ino:%lu, pino:%u) "
- "in readonly mountpoint", dir->i_ino, pino);
+ f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint",
+ dir->i_ino, pino);
return 0;
}
@@ -484,9 +483,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ENCRYPTED(dir) &&
(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!fscrypt_has_permitted_context(dir, inode)) {
- f2fs_msg(inode->i_sb, KERN_WARNING,
- "Inconsistent encryption contexts: %lu/%lu",
- dir->i_ino, inode->i_ino);
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu",
+ dir->i_ino, inode->i_ino);
err = -EPERM;
goto out_iput;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 18a038a2a9fa..a18b2a895771 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -34,10 +34,9 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
{
if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_msg(sbi->sb, KERN_WARNING,
- "%s: out-of-range nid=%x, run fsck to fix.",
- __func__, nid);
- return -EINVAL;
+ f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
+ __func__, nid);
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -1189,10 +1188,8 @@ int f2fs_remove_inode_page(struct inode *inode)
}
if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
- f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
- "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
- inode->i_ino,
- (unsigned long long)inode->i_blocks);
+ f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu",
+ inode->i_ino, (unsigned long long)inode->i_blocks);
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
}
@@ -1291,7 +1288,7 @@ static int read_node_page(struct page *page, int op_flags)
if (PageUptodate(page)) {
if (!f2fs_inode_chksum_verify(sbi, page)) {
ClearPageUptodate(page);
- return -EBADMSG;
+ return -EFSBADCRC;
}
return LOCKED_PAGE;
}
@@ -1375,16 +1372,15 @@ repeat:
}
if (!f2fs_inode_chksum_verify(sbi, page)) {
- err = -EBADMSG;
+ err = -EFSBADCRC;
goto out_err;
}
page_hit:
if(unlikely(nid != nid_of_node(page))) {
- f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
- "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
- nid, nid_of_node(page), ino_of_node(page),
- ofs_of_node(page), cpver_of_node(page),
- next_blkaddr_of_node(page));
+ f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
+ nid, nid_of_node(page), ino_of_node(page),
+ ofs_of_node(page), cpver_of_node(page),
+ next_blkaddr_of_node(page));
err = -EINVAL;
out_err:
ClearPageUptodate(page);
@@ -1752,9 +1748,8 @@ continue_unlock:
break;
}
if (!ret && atomic && !marked) {
- f2fs_msg(sbi->sb, KERN_DEBUG,
- "Retry to write fsync mark: ino=%u, idx=%lx",
- ino, last_page->index);
+ f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
+ ino, last_page->index);
lock_page(last_page);
f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
@@ -2304,8 +2299,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
if (ret) {
up_read(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, !mount);
- f2fs_msg(sbi->sb, KERN_ERR,
- "NAT is corrupt, run fsck to fix it");
+ f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
return ret;
}
}
@@ -2725,7 +2719,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
i = 1;
}
for (; i < NAT_ENTRY_PER_BLOCK; i++) {
- if (nat_blk->entries[i].block_addr != NULL_ADDR)
+ if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
if (valid == 0) {
@@ -2915,7 +2909,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
nm_i->full_nat_bits = nm_i->nat_bits + 8;
nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
- f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
+ f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e04f82b3f4fc..783773e4560d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -188,10 +188,9 @@ out:
name = "<encrypted>";
else
name = raw_inode->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "%s: ino = %x, name = %s, dir = %lx, err = %d",
- __func__, ino_of_node(ipage), name,
- IS_ERR(dir) ? 0 : dir->i_ino, err);
+ f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
+ __func__, ino_of_node(ipage), name,
+ IS_ERR(dir) ? 0 : dir->i_ino, err);
return err;
}
@@ -292,9 +291,8 @@ static int recover_inode(struct inode *inode, struct page *page)
else
name = F2FS_INODE(page)->i_name;
- f2fs_msg(inode->i_sb, KERN_NOTICE,
- "recover_inode: ino = %x, name = %s, inline = %x",
- ino_of_node(page), name, raw->i_inline);
+ f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
+ ino_of_node(page), name, raw->i_inline);
return 0;
}
@@ -371,10 +369,9 @@ next:
/* sanity check in order to detect looped node chain */
if (++loop_cnt >= free_blocks ||
blkaddr == next_blkaddr_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "%s: detect looped node chain, "
- "blkaddr:%u, next:%u",
- __func__, blkaddr, next_blkaddr_of_node(page));
+ f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u",
+ __func__, blkaddr,
+ next_blkaddr_of_node(page));
f2fs_put_page(page, 1);
err = -EINVAL;
break;
@@ -553,11 +550,10 @@ retry_dn:
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
- inode->i_ino, ofs_of_node(dn.node_page),
- ofs_of_node(page));
- err = -EFAULT;
+ f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
+ inode->i_ino, ofs_of_node(dn.node_page),
+ ofs_of_node(page));
+ err = -EFSCORRUPTED;
goto err;
}
@@ -569,13 +565,13 @@ retry_dn:
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto err;
}
if (__is_valid_data_blkaddr(dest) &&
!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
- err = -EFAULT;
+ err = -EFSCORRUPTED;
goto err;
}
@@ -642,11 +638,9 @@ retry_prev:
err:
f2fs_put_dnode(&dn);
out:
- f2fs_msg(sbi->sb, KERN_NOTICE,
- "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
- inode->i_ino,
- file_keep_isize(inode) ? "keep" : "recover",
- recovered, err);
+ f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
+ inode->i_ino, file_keep_isize(inode) ? "keep" : "recover",
+ recovered, err);
return err;
}
@@ -734,8 +728,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
#endif
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "recover fsync data on readonly fs");
+ f2fs_info(sbi, "recover fsync data on readonly fs");
sbi->sb->s_flags &= ~SB_RDONLY;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8dee063c833f..a661ac32e829 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -546,9 +546,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
if (test_opt(sbi, DATA_FLUSH)) {
struct blk_plug plug;
+ mutex_lock(&sbi->flush_lock);
+
blk_start_plug(&plug);
f2fs_sync_dirty_inodes(sbi, FILE_INODE);
blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
@@ -869,11 +873,14 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
mutex_unlock(&dirty_i->seglist_lock);
}
-int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
- block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
block_t holes[2] = {0, 0}; /* DATA and NODE */
+ block_t unusable;
struct seg_entry *se;
unsigned int segno;
@@ -887,10 +894,20 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
}
mutex_unlock(&dirty_i->seglist_lock);
- if (holes[DATA] > ovp || holes[NODE] > ovp)
+ unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
+ if (unusable > ovp_holes)
+ return unusable - ovp_holes;
+ return 0;
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
+{
+ int ovp_hole_segs =
+ (overprovision_segments(sbi) - reserved_segments(sbi));
+ if (unusable > F2FS_OPTION(sbi).unusable_cap)
return -EAGAIN;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
- dirty_segments(sbi) > overprovision_segments(sbi))
+ dirty_segments(sbi) > ovp_hole_segs)
return -EAGAIN;
return 0;
}
@@ -1480,6 +1497,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
+ if (dpolicy->timeout != 0 &&
+ f2fs_time_over(sbi, dpolicy->timeout))
+ break;
+
if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
!is_idle(sbi, DISCARD_TIME)) {
io_interrupted = true;
@@ -1740,8 +1761,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
devi = f2fs_target_device_index(sbi, blkstart);
if (blkstart < FDEV(devi).start_blk ||
blkstart > FDEV(devi).end_blk) {
- f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x",
- blkstart);
+ f2fs_err(sbi, "Invalid block %x", blkstart);
return -EIO;
}
blkstart -= FDEV(devi).start_blk;
@@ -1754,10 +1774,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
if (sector & (bdev_zone_sectors(bdev) - 1) ||
nr_sects != bdev_zone_sectors(bdev)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
- devi, sbi->s_ndevs ? FDEV(devi).path: "",
- blkstart, blklen);
+ f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
+ devi, sbi->s_ndevs ? FDEV(devi).path : "",
+ blkstart, blklen);
return -EIO;
}
trace_f2fs_issue_reset_zone(bdev, blkstart);
@@ -2121,15 +2140,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
mir_exist = f2fs_test_and_set_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when setting bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly set, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks--;
del = 0;
@@ -2150,15 +2168,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
mir_exist = f2fs_test_and_clear_bit(offset,
se->cur_valid_map_mir);
if (unlikely(exist != mir_exist)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
- "when clearing bitmap, blk:%u, old bit:%d",
- blkaddr, exist);
+ f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
+ blkaddr, exist);
f2fs_bug_on(sbi, 1);
}
#endif
if (unlikely(!exist)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Bitmap was wrongly cleared, blk:%u", blkaddr);
+ f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
+ blkaddr);
f2fs_bug_on(sbi, 1);
se->valid_blocks++;
del = 0;
@@ -2640,6 +2657,39 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
stat_inc_seg_type(sbi, curseg);
}
+void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+ unsigned int start, unsigned int end)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned int segno;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ segno = CURSEG_I(sbi, type)->segno;
+ if (segno < start || segno > end)
+ goto unlock;
+
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
+ change_curseg(sbi, type);
+ else
+ new_curseg(sbi, type, true);
+
+ stat_inc_seg_type(sbi, curseg);
+
+ locate_dirty_segment(sbi, segno);
+unlock:
+ up_write(&SIT_I(sbi)->sentry_lock);
+
+ if (segno != curseg->segno)
+ f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
+ type, segno, curseg->segno);
+
+ mutex_unlock(&curseg->curseg_mutex);
+ up_read(&SM_I(sbi)->curseg_lock);
+}
+
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg;
@@ -2772,9 +2822,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
goto out;
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
- f2fs_msg(sbi->sb, KERN_WARNING,
- "Found FS corruption, run fsck to fix.");
- return -EIO;
+ f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
+ return -EFSCORRUPTED;
}
/* start/end segment number in main_area */
@@ -3197,12 +3246,17 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EFAULT;
+ f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
+ __func__, segno);
+ return -EFSCORRUPTED;
}
stat_inc_inplace_blocks(fio->sbi);
- err = f2fs_submit_page_bio(fio);
+ if (fio->bio)
+ err = f2fs_merge_page_bio(fio);
+ else
+ err = f2fs_submit_page_bio(fio);
if (!err) {
update_device_state(fio);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
@@ -3393,6 +3447,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
seg_i = CURSEG_I(sbi, i);
segno = le32_to_cpu(ckpt->cur_data_segno[i]);
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
+ if (blk_off > ENTRIES_IN_SUM) {
+ f2fs_bug_on(sbi, 1);
+ f2fs_put_page(page, 1);
+ return -EFAULT;
+ }
seg_i->next_segno = segno;
reset_curseg(sbi, i, 0);
seg_i->alloc_type = ckpt->alloc_type[i];
@@ -3530,8 +3589,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
/* sanity check for summary blocks */
if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
- sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
+ sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
+ f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
+ nats_in_cursum(nat_j), sits_in_cursum(sit_j));
return -EINVAL;
+ }
return 0;
}
@@ -3762,7 +3824,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_journal *journal = curseg->journal;
struct sit_entry_set *ses, *tmp;
struct list_head *head = &SM_I(sbi)->sit_entry_set;
- bool to_journal = true;
+ bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
struct seg_entry *se;
down_write(&sit_i->sentry_lock);
@@ -3781,7 +3843,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and add and account
* them in sit entry set.
*/
- if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
+ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
+ !to_journal)
remove_sits_in_journal(sbi);
/*
@@ -4096,11 +4159,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
start = le32_to_cpu(segno_in_journal(journal, i));
if (start >= MAIN_SEGS(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong journal entry on segno %u",
- start);
+ f2fs_err(sbi, "Wrong journal entry on segno %u",
+ start);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ err = -EFSCORRUPTED;
break;
}
@@ -4137,11 +4199,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
up_read(&curseg->journal_rwsem);
if (!err && total_node_blocks != valid_node_count(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "SIT is corrupted node# %u vs %u",
- total_node_blocks, valid_node_count(sbi));
+ f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
+ total_node_blocks, valid_node_count(sbi));
set_sbi_flag(sbi, SBI_NEED_FSCK);
- err = -EINVAL;
+ err = -EFSCORRUPTED;
}
return err;
@@ -4232,6 +4293,39 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi)
return init_victim_secmap(sbi);
}
+static int sanity_check_curseg(struct f2fs_sb_info *sbi)
+{
+ int i;
+
+ /*
+ * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
+ * In LFS curseg, all blkaddr after .next_blkoff should be unused.
+ */
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
+ struct curseg_info *curseg = CURSEG_I(sbi, i);
+ struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
+ unsigned int blkofs = curseg->next_blkoff;
+
+ if (f2fs_test_bit(blkofs, se->cur_valid_map))
+ goto out;
+
+ if (curseg->alloc_type == SSR)
+ continue;
+
+ for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
+ if (!f2fs_test_bit(blkofs, se->cur_valid_map))
+ continue;
+out:
+ f2fs_err(sbi,
+ "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
+ i, curseg->segno, curseg->alloc_type,
+ curseg->next_blkoff, blkofs);
+ return -EFSCORRUPTED;
+ }
+ }
+ return 0;
+}
+
/*
* Update min, max modified time for cost-benefit GC algorithm
*/
@@ -4327,6 +4421,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
+ err = sanity_check_curseg(sbi);
+ if (err)
+ return err;
+
init_min_max_mtime(sbi);
return 0;
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 429007b8036e..b74602813a05 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -109,7 +109,7 @@
#define START_SEGNO(segno) \
(SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK)
#define SIT_BLK_CNT(sbi) \
- ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK)
+ DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK)
#define f2fs_bitmap_size(nr) \
(BITS_TO_LONGS(nr) * sizeof(unsigned long))
@@ -693,21 +693,19 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
} while (cur_pos < sbi->blocks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Mismatch valid blocks %d vs. %d",
- GET_SIT_VBLOCKS(raw_sit), valid_blocks);
+ f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
+ GET_SIT_VBLOCKS(raw_sit), valid_blocks);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
/* check segment usage, and check boundary of a given segment number */
if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid blocks %d or segno %u",
- GET_SIT_VBLOCKS(raw_sit), segno);
+ f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
+ GET_SIT_VBLOCKS(raw_sit), segno);
set_sbi_flag(sbi, SBI_NEED_FSCK);
- return -EINVAL;
+ return -EFSCORRUPTED;
}
return 0;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6b959bbb336a..d95a681ef7c9 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -136,7 +136,10 @@ enum {
Opt_alloc,
Opt_fsync,
Opt_test_dummy_encryption,
- Opt_checkpoint,
+ Opt_checkpoint_disable,
+ Opt_checkpoint_disable_cap,
+ Opt_checkpoint_disable_cap_perc,
+ Opt_checkpoint_enable,
Opt_err,
};
@@ -195,45 +198,52 @@ static match_table_t f2fs_tokens = {
{Opt_alloc, "alloc_mode=%s"},
{Opt_fsync, "fsync_mode=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
- {Opt_checkpoint, "checkpoint=%s"},
+ {Opt_checkpoint_disable, "checkpoint=disable"},
+ {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"},
+ {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"},
+ {Opt_checkpoint_enable, "checkpoint=enable"},
{Opt_err, NULL},
};
-void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...)
+void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
+ int level;
va_start(args, fmt);
- vaf.fmt = fmt;
+
+ level = printk_get_level(fmt);
+ vaf.fmt = printk_skip_level(fmt);
vaf.va = &args;
- printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf);
+ printk("%c%cF2FS-fs (%s): %pV\n",
+ KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf);
+
va_end(args);
}
static inline void limit_reserve_root(struct f2fs_sb_info *sbi)
{
- block_t limit = (sbi->user_block_count << 1) / 1000;
+ block_t limit = min((sbi->user_block_count << 1) / 1000,
+ sbi->user_block_count - sbi->reserved_blocks);
/* limit is 0.2% */
if (test_opt(sbi, RESERVE_ROOT) &&
F2FS_OPTION(sbi).root_reserved_blocks > limit) {
F2FS_OPTION(sbi).root_reserved_blocks = limit;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Reduce reserved blocks for root = %u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Reduce reserved blocks for root = %u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
}
if (!test_opt(sbi, RESERVE_ROOT) &&
(!uid_eq(F2FS_OPTION(sbi).s_resuid,
make_kuid(&init_user_ns, F2FS_DEF_RESUID)) ||
!gid_eq(F2FS_OPTION(sbi).s_resgid,
make_kgid(&init_user_ns, F2FS_DEF_RESGID))))
- f2fs_msg(sbi->sb, KERN_INFO,
- "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
- from_kuid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resuid),
- from_kgid_munged(&init_user_ns,
- F2FS_OPTION(sbi).s_resgid));
+ f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root",
+ from_kuid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resuid),
+ from_kgid_munged(&init_user_ns,
+ F2FS_OPTION(sbi).s_resgid));
}
static void init_once(void *foo)
@@ -254,35 +264,29 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
int ret = -EINVAL;
if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR,
- "Cannot change journaled "
- "quota options when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
if (f2fs_sb_has_quota_ino(sbi)) {
- f2fs_msg(sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore qf_name");
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name");
return 0;
}
qname = match_strdup(args);
if (!qname) {
- f2fs_msg(sb, KERN_ERR,
- "Not enough memory for storing quotafile name");
+ f2fs_err(sbi, "Not enough memory for storing quotafile name");
return -ENOMEM;
}
if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
ret = 0;
else
- f2fs_msg(sb, KERN_ERR,
- "%s quota file already specified",
+ f2fs_err(sbi, "%s quota file already specified",
QTYPE2NAME(qtype));
goto errout;
}
if (strchr(qname, '/')) {
- f2fs_msg(sb, KERN_ERR,
- "quotafile must be on filesystem root");
+ f2fs_err(sbi, "quotafile must be on filesystem root");
goto errout;
}
F2FS_OPTION(sbi).s_qf_names[qtype] = qname;
@@ -298,8 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) {
- f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options"
- " when quota turned on");
+ f2fs_err(sbi, "Cannot change journaled quota options when quota turned on");
return -EINVAL;
}
kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]);
@@ -315,8 +318,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
* to support legacy quotas in quota files.
*/
if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. "
- "Cannot enable project quota enforcement.");
+ f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement.");
return -1;
}
if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
@@ -336,21 +338,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi)
if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) ||
test_opt(sbi, PRJQUOTA)) {
- f2fs_msg(sbi->sb, KERN_ERR, "old and new quota "
- "format mixing");
+ f2fs_err(sbi, "old and new quota format mixing");
return -1;
}
if (!F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format "
- "not specified");
+ f2fs_err(sbi, "journaled quota format not specified");
return -1;
}
}
if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "QUOTA feature is enabled, so ignore jquota_fmt");
+ f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt");
F2FS_OPTION(sbi).s_jquota_fmt = 0;
}
return 0;
@@ -418,8 +417,7 @@ static int parse_options(struct super_block *sb, char *options)
break;
case Opt_nodiscard:
if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sb, KERN_WARNING,
- "discard is required for zoned block devices");
+ f2fs_warn(sbi, "discard is required for zoned block devices");
return -EINVAL;
}
clear_opt(sbi, DISCARD);
@@ -451,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_user_xattr:
- f2fs_msg(sb, KERN_INFO,
- "user_xattr options not supported");
+ f2fs_info(sbi, "user_xattr options not supported");
break;
case Opt_nouser_xattr:
- f2fs_msg(sb, KERN_INFO,
- "nouser_xattr options not supported");
+ f2fs_info(sbi, "nouser_xattr options not supported");
break;
case Opt_inline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "inline_xattr options not supported");
+ f2fs_info(sbi, "inline_xattr options not supported");
break;
case Opt_noinline_xattr:
- f2fs_msg(sb, KERN_INFO,
- "noinline_xattr options not supported");
+ f2fs_info(sbi, "noinline_xattr options not supported");
break;
#endif
#ifdef CONFIG_F2FS_FS_POSIX_ACL
@@ -476,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_acl:
- f2fs_msg(sb, KERN_INFO, "acl options not supported");
+ f2fs_info(sbi, "acl options not supported");
break;
case Opt_noacl:
- f2fs_msg(sb, KERN_INFO, "noacl options not supported");
+ f2fs_info(sbi, "noacl options not supported");
break;
#endif
case Opt_active_logs:
@@ -529,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (test_opt(sbi, RESERVE_ROOT)) {
- f2fs_msg(sb, KERN_INFO,
- "Preserve previous reserve_root=%u",
- F2FS_OPTION(sbi).root_reserved_blocks);
+ f2fs_info(sbi, "Preserve previous reserve_root=%u",
+ F2FS_OPTION(sbi).root_reserved_blocks);
} else {
F2FS_OPTION(sbi).root_reserved_blocks = arg;
set_opt(sbi, RESERVE_ROOT);
@@ -542,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
uid = make_kuid(current_user_ns(), arg);
if (!uid_valid(uid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid uid value %d", arg);
+ f2fs_err(sbi, "Invalid uid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resuid = uid;
@@ -553,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
gid = make_kgid(current_user_ns(), arg);
if (!gid_valid(gid)) {
- f2fs_msg(sb, KERN_ERR,
- "Invalid gid value %d", arg);
+ f2fs_err(sbi, "Invalid gid value %d", arg);
return -EINVAL;
}
F2FS_OPTION(sbi).s_resgid = gid;
@@ -567,9 +558,7 @@ static int parse_options(struct super_block *sb, char *options)
if (strlen(name) == 8 &&
!strncmp(name, "adaptive", 8)) {
if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sb, KERN_WARNING,
- "adaptive mode is not allowed with "
- "zoned block device feature");
+ f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature");
kvfree(name);
return -EINVAL;
}
@@ -587,9 +576,8 @@ static int parse_options(struct super_block *sb, char *options)
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
- f2fs_msg(sb, KERN_WARNING,
- "Not support %d, larger than %d",
- 1 << arg, BIO_MAX_PAGES);
+ f2fs_warn(sbi, "Not support %d, larger than %d",
+ 1 << arg, BIO_MAX_PAGES);
return -EINVAL;
}
F2FS_OPTION(sbi).write_io_size_bits = arg;
@@ -610,13 +598,11 @@ static int parse_options(struct super_block *sb, char *options)
break;
#else
case Opt_fault_injection:
- f2fs_msg(sb, KERN_INFO,
- "fault_injection options not supported");
+ f2fs_info(sbi, "fault_injection options not supported");
break;
case Opt_fault_type:
- f2fs_msg(sb, KERN_INFO,
- "fault_type options not supported");
+ f2fs_info(sbi, "fault_type options not supported");
break;
#endif
case Opt_lazytime:
@@ -696,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_jqfmt_vfsv0:
case Opt_jqfmt_vfsv1:
case Opt_noquota:
- f2fs_msg(sb, KERN_INFO,
- "quota operations not supported");
+ f2fs_info(sbi, "quota operations not supported");
break;
#endif
case Opt_whint:
@@ -759,39 +744,44 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_test_dummy_encryption:
#ifdef CONFIG_FS_ENCRYPTION
if (!f2fs_sb_has_encrypt(sbi)) {
- f2fs_msg(sb, KERN_ERR, "Encrypt feature is off");
+ f2fs_err(sbi, "Encrypt feature is off");
return -EINVAL;
}
F2FS_OPTION(sbi).test_dummy_encryption = true;
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mode enabled");
+ f2fs_info(sbi, "Test dummy encryption mode enabled");
#else
- f2fs_msg(sb, KERN_INFO,
- "Test dummy encryption mount option ignored");
+ f2fs_info(sbi, "Test dummy encryption mount option ignored");
#endif
break;
- case Opt_checkpoint:
- name = match_strdup(&args[0]);
- if (!name)
- return -ENOMEM;
-
- if (strlen(name) == 6 &&
- !strncmp(name, "enable", 6)) {
- clear_opt(sbi, DISABLE_CHECKPOINT);
- } else if (strlen(name) == 7 &&
- !strncmp(name, "disable", 7)) {
- set_opt(sbi, DISABLE_CHECKPOINT);
- } else {
- kvfree(name);
+ case Opt_checkpoint_disable_cap_perc:
+ if (args->from && match_int(args, &arg))
return -EINVAL;
- }
- kvfree(name);
+ if (arg < 0 || arg > 100)
+ return -EINVAL;
+ if (arg == 100)
+ F2FS_OPTION(sbi).unusable_cap =
+ sbi->user_block_count;
+ else
+ F2FS_OPTION(sbi).unusable_cap =
+ (sbi->user_block_count / 100) * arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable_cap:
+ if (args->from && match_int(args, &arg))
+ return -EINVAL;
+ F2FS_OPTION(sbi).unusable_cap = arg;
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_disable:
+ set_opt(sbi, DISABLE_CHECKPOINT);
+ break;
+ case Opt_checkpoint_enable:
+ clear_opt(sbi, DISABLE_CHECKPOINT);
break;
default:
- f2fs_msg(sb, KERN_ERR,
- "Unrecognized mount option \"%s\" or missing value",
- p);
+ f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
+ p);
return -EINVAL;
}
}
@@ -800,23 +790,18 @@ static int parse_options(struct super_block *sb, char *options)
return -EINVAL;
#else
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_msg(sbi->sb, KERN_INFO,
- "Filesystem with quota feature cannot be mounted RDWR "
- "without CONFIG_QUOTA");
+ f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA");
return -EINVAL;
}
if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) {
- f2fs_msg(sb, KERN_ERR,
- "Filesystem with project quota feature cannot be "
- "mounted RDWR without CONFIG_QUOTA");
+ f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA");
return -EINVAL;
}
#endif
if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
- f2fs_msg(sb, KERN_ERR,
- "Should set mode=lfs with %uKB-sized IO",
- F2FS_IO_SIZE_KB(sbi));
+ f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
+ F2FS_IO_SIZE_KB(sbi));
return -EINVAL;
}
@@ -825,15 +810,11 @@ static int parse_options(struct super_block *sb, char *options)
if (!f2fs_sb_has_extra_attr(sbi) ||
!f2fs_sb_has_flexible_inline_xattr(sbi)) {
- f2fs_msg(sb, KERN_ERR,
- "extra_attr or flexible_inline_xattr "
- "feature is off");
+ f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off");
return -EINVAL;
}
if (!test_opt(sbi, INLINE_XATTR)) {
- f2fs_msg(sb, KERN_ERR,
- "inline_xattr_size option should be "
- "set with inline_xattr option");
+ f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option");
return -EINVAL;
}
@@ -842,16 +823,14 @@ static int parse_options(struct super_block *sb, char *options)
if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
F2FS_OPTION(sbi).inline_xattr_size > max_size) {
- f2fs_msg(sb, KERN_ERR,
- "inline xattr size is out of range: %d ~ %d",
- min_size, max_size);
+ f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d",
+ min_size, max_size);
return -EINVAL;
}
}
if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
- f2fs_msg(sb, KERN_ERR,
- "LFS not compatible with checkpoint=disable\n");
+ f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n");
return -EINVAL;
}
@@ -1313,6 +1292,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, DISCARD))
seq_puts(seq, ",discard");
+ else
+ seq_puts(seq, ",nodiscard");
if (test_opt(sbi, NOHEAP))
seq_puts(seq, ",no_heap");
else
@@ -1409,8 +1390,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
seq_printf(seq, ",alloc_mode=%s", "reuse");
if (test_opt(sbi, DISABLE_CHECKPOINT))
- seq_puts(seq, ",checkpoint=disable");
-
+ seq_printf(seq, ",checkpoint=disable:%u",
+ F2FS_OPTION(sbi).unusable_cap);
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
seq_printf(seq, ",fsync_mode=%s", "posix");
else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1439,6 +1420,7 @@ static void default_options(struct f2fs_sb_info *sbi)
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
clear_opt(sbi, DISABLE_CHECKPOINT);
+ F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
set_opt(sbi, DISCARD);
@@ -1467,10 +1449,10 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
struct cp_control cpc;
int err = 0;
int ret;
+ block_t unusable;
if (s_flags & SB_RDONLY) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "checkpoint=disable on readonly fs");
+ f2fs_err(sbi, "checkpoint=disable on readonly fs");
return -EINVAL;
}
sbi->sb->s_flags |= SB_ACTIVE;
@@ -1494,7 +1476,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
goto restore_flag;
}
- if (f2fs_disable_cp_again(sbi)) {
+ unusable = f2fs_get_unusable_blocks(sbi);
+ if (f2fs_disable_cp_again(sbi, unusable)) {
err = -EAGAIN;
goto restore_flag;
}
@@ -1507,7 +1490,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
goto out_unlock;
spin_lock(&sbi->stat_lock);
- sbi->unusable_block_count = 0;
+ sbi->unusable_block_count = unusable;
spin_unlock(&sbi->stat_lock);
out_unlock:
@@ -1572,8 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/* recover superblocks we couldn't write due to previous RO mount */
if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) {
err = f2fs_commit_super(sbi, false);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover all the superblocks, ret: %d", err);
+ f2fs_info(sbi, "Try to recover all the superblocks, ret: %d",
+ err);
if (!err)
clear_sbi_flag(sbi, SBI_NEED_SB_WRITE);
}
@@ -1614,15 +1597,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "switch extent_cache option is not allowed");
+ f2fs_warn(sbi, "switch extent_cache option is not allowed");
goto restore_opts;
}
if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
err = -EINVAL;
- f2fs_msg(sbi->sb, KERN_WARNING,
- "disabling checkpoint not compatible with read-only");
+ f2fs_warn(sbi, "disabling checkpoint not compatible with read-only");
goto restore_opts;
}
@@ -1692,8 +1673,7 @@ skip:
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
- f2fs_msg(sbi->sb, KERN_WARNING,
- "background gc thread has stopped");
+ f2fs_warn(sbi, "background gc thread has stopped");
} else if (need_stop_gc) {
f2fs_stop_gc_thread(sbi);
}
@@ -1832,8 +1812,7 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
{
if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "quota sysfile may be corrupted, skip loading it");
+ f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it");
return 0;
}
@@ -1849,8 +1828,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
if (f2fs_sb_has_quota_ino(sbi) && rdonly) {
err = f2fs_enable_quotas(sbi->sb);
if (err) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quota_ino: %d", err);
+ f2fs_err(sbi, "Cannot turn on quota_ino: %d", err);
return 0;
}
return 1;
@@ -1863,8 +1841,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly)
enabled = 1;
continue;
}
- f2fs_msg(sbi->sb, KERN_ERR,
- "Cannot turn on quotas: %d on %d", err, i);
+ f2fs_err(sbi, "Cannot turn on quotas: %d on %d",
+ err, i);
}
}
return enabled;
@@ -1885,8 +1863,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
qf_inode = f2fs_iget(sb, qf_inum);
if (IS_ERR(qf_inode)) {
- f2fs_msg(sb, KERN_ERR,
- "Bad quota inode %u:%lu", type, qf_inum);
+ f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum);
return PTR_ERR(qf_inode);
}
@@ -1899,17 +1876,17 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id,
static int f2fs_enable_quotas(struct super_block *sb)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
int type, err = 0;
unsigned long qf_inum;
bool quota_mopt[MAXQUOTAS] = {
- test_opt(F2FS_SB(sb), USRQUOTA),
- test_opt(F2FS_SB(sb), GRPQUOTA),
- test_opt(F2FS_SB(sb), PRJQUOTA),
+ test_opt(sbi, USRQUOTA),
+ test_opt(sbi, GRPQUOTA),
+ test_opt(sbi, PRJQUOTA),
};
if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
- f2fs_msg(sb, KERN_ERR,
- "quota file may be corrupted, skip loading it");
+ f2fs_err(sbi, "quota file may be corrupted, skip loading it");
return 0;
}
@@ -1922,10 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "fsck to fix.", type, err);
+ f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.",
+ type, err);
for (type--; type >= 0; type--)
dquot_quota_off(sb, type);
set_sbi_flag(F2FS_SB(sb),
@@ -1944,6 +1919,18 @@ int f2fs_quota_sync(struct super_block *sb, int type)
int cnt;
int ret;
+ /*
+ * do_quotactl
+ * f2fs_quota_sync
+ * down_read(quota_sem)
+ * dquot_writeback_dquots()
+ * f2fs_dquot_commit
+ * block_operation
+ * down_read(quota_sem)
+ */
+ f2fs_lock_op(sbi);
+
+ down_read(&sbi->quota_sem);
ret = dquot_writeback_dquots(sb, type);
if (ret)
goto out;
@@ -1981,6 +1968,8 @@ int f2fs_quota_sync(struct super_block *sb, int type)
out:
if (ret)
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
+ f2fs_unlock_op(sbi);
return ret;
}
@@ -2045,10 +2034,8 @@ void f2fs_quota_off_umount(struct super_block *sb)
if (err) {
int ret = dquot_quota_off(sb, type);
- f2fs_msg(sb, KERN_ERR,
- "Fail to turn off disk quota "
- "(type: %d, err: %d, ret:%d), Please "
- "run fsck to fix it.", type, err, ret);
+ f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.",
+ type, err, ret);
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
}
}
@@ -2074,32 +2061,40 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
static int f2fs_dquot_commit(struct dquot *dquot)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_commit(dquot);
if (ret < 0)
- set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_acquire(struct dquot *dquot)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_acquire(dquot);
if (ret < 0)
- set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
-
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_release(struct dquot *dquot)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_release(dquot);
if (ret < 0)
- set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
@@ -2109,22 +2104,27 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
struct f2fs_sb_info *sbi = F2FS_SB(sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_mark_dquot_dirty(dquot);
/* if we are using journalled quota */
if (is_journalled_quota(sbi))
set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+ up_read(&sbi->quota_sem);
return ret;
}
static int f2fs_dquot_commit_info(struct super_block *sb, int type)
{
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
int ret;
+ down_read(&sbi->quota_sem);
ret = dquot_commit_info(sb, type);
if (ret < 0)
- set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+ set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+ up_read(&sbi->quota_sem);
return ret;
}
@@ -2341,55 +2341,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
(segment_count << log_blocks_per_seg);
if (segment0_blkaddr != cp_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
- segment0_blkaddr, cp_blkaddr);
+ f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
return true;
}
if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
sit_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
- cp_blkaddr, sit_blkaddr,
- segment_count_ckpt << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
return true;
}
if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
nat_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
- sit_blkaddr, nat_blkaddr,
- segment_count_sit << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
return true;
}
if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
ssa_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
- nat_blkaddr, ssa_blkaddr,
- segment_count_nat << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
return true;
}
if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
main_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
- ssa_blkaddr, main_blkaddr,
- segment_count_ssa << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
return true;
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
+ main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
int err = 0;
@@ -2406,12 +2400,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_msg(sb, KERN_INFO,
- "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
- segment_count_main << log_blocks_per_seg);
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
+ res, main_blkaddr,
+ segment0_blkaddr +
+ (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
if (err)
return true;
}
@@ -2425,7 +2418,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
- struct super_block *sb = sbi->sb;
unsigned int blocksize;
size_t crc_offset = 0;
__u32 crc = 0;
@@ -2435,48 +2427,42 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
crc_offset = le32_to_cpu(raw_super->checksum_offset);
if (crc_offset !=
offsetof(struct f2fs_super_block, crc)) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid SB checksum offset: %zu",
- crc_offset);
+ f2fs_info(sbi, "Invalid SB checksum offset: %zu",
+ crc_offset);
return 1;
}
crc = le32_to_cpu(raw_super->crc);
if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid SB checksum value: %u", crc);
+ f2fs_info(sbi, "Invalid SB checksum value: %u", crc);
return 1;
}
}
if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
- f2fs_msg(sb, KERN_INFO,
- "Magic Mismatch, valid(0x%x) - read(0x%x)",
- F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
+ f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
+ F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
return 1;
}
/* Currently, support only 4KB page cache size */
if (F2FS_BLKSIZE != PAGE_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid page_cache_size (%lu), supports only 4KB",
- PAGE_SIZE);
+ f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
+ PAGE_SIZE);
return 1;
}
/* Currently, support only 4KB block size */
blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
if (blocksize != F2FS_BLKSIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid blocksize (%u), supports only 4KB",
- blocksize);
+ f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
+ blocksize);
return 1;
}
/* check log blocks per segment */
if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log blocks per segment (%u)",
- le32_to_cpu(raw_super->log_blocks_per_seg));
+ f2fs_info(sbi, "Invalid log blocks per segment (%u)",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
return 1;
}
@@ -2485,17 +2471,16 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
F2FS_MIN_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
- le32_to_cpu(raw_super->log_sectorsize));
+ f2fs_info(sbi, "Invalid log sectorsize (%u)",
+ le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
if (le32_to_cpu(raw_super->log_sectors_per_block) +
le32_to_cpu(raw_super->log_sectorsize) !=
F2FS_MAX_LOG_SECTOR_SIZE) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid log sectors per block(%u) log sectorsize(%u)",
- le32_to_cpu(raw_super->log_sectors_per_block),
- le32_to_cpu(raw_super->log_sectorsize));
+ f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)",
+ le32_to_cpu(raw_super->log_sectors_per_block),
+ le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
@@ -2509,59 +2494,51 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
if (segment_count > F2FS_MAX_SEGMENT ||
segment_count < F2FS_MIN_SEGMENTS) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment count (%u)",
- segment_count);
+ f2fs_info(sbi, "Invalid segment count (%u)", segment_count);
return 1;
}
if (total_sections > segment_count ||
total_sections < F2FS_MIN_SEGMENTS ||
segs_per_sec > segment_count || !segs_per_sec) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid segment/section count (%u, %u x %u)",
- segment_count, total_sections, segs_per_sec);
+ f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
+ segment_count, total_sections, segs_per_sec);
return 1;
}
if ((segment_count / segs_per_sec) < total_sections) {
- f2fs_msg(sb, KERN_INFO,
- "Small segment_count (%u < %u * %u)",
- segment_count, segs_per_sec, total_sections);
+ f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
+ segment_count, segs_per_sec, total_sections);
return 1;
}
if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong segment_count / block_count (%u > %llu)",
- segment_count, le64_to_cpu(raw_super->block_count));
+ f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)",
+ segment_count, le64_to_cpu(raw_super->block_count));
return 1;
}
if (secs_per_zone > total_sections || !secs_per_zone) {
- f2fs_msg(sb, KERN_INFO,
- "Wrong secs_per_zone / total_sections (%u, %u)",
- secs_per_zone, total_sections);
+ f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
+ secs_per_zone, total_sections);
return 1;
}
if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
(le32_to_cpu(raw_super->extension_count) +
raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) {
- f2fs_msg(sb, KERN_INFO,
- "Corrupted extension count (%u + %u > %u)",
- le32_to_cpu(raw_super->extension_count),
- raw_super->hot_ext_count,
- F2FS_MAX_EXTENSION);
+ f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)",
+ le32_to_cpu(raw_super->extension_count),
+ raw_super->hot_ext_count,
+ F2FS_MAX_EXTENSION);
return 1;
}
if (le32_to_cpu(raw_super->cp_payload) >
(blocks_per_seg - F2FS_CP_PACKS)) {
- f2fs_msg(sb, KERN_INFO,
- "Insane cp_payload (%u > %u)",
- le32_to_cpu(raw_super->cp_payload),
- blocks_per_seg - F2FS_CP_PACKS);
+ f2fs_info(sbi, "Insane cp_payload (%u > %u)",
+ le32_to_cpu(raw_super->cp_payload),
+ blocks_per_seg - F2FS_CP_PACKS);
return 1;
}
@@ -2569,11 +2546,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
if (le32_to_cpu(raw_super->node_ino) != 1 ||
le32_to_cpu(raw_super->meta_ino) != 2 ||
le32_to_cpu(raw_super->root_ino) != 3) {
- f2fs_msg(sb, KERN_INFO,
- "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
- le32_to_cpu(raw_super->node_ino),
- le32_to_cpu(raw_super->meta_ino),
- le32_to_cpu(raw_super->root_ino));
+ f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
return 1;
}
@@ -2617,8 +2593,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong layout: check mkfs.f2fs version");
+ f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
return 1;
}
@@ -2627,16 +2602,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
if (!user_block_count || user_block_count >=
segment_count_main << log_blocks_per_seg) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong user_block_count: %u", user_block_count);
+ f2fs_err(sbi, "Wrong user_block_count: %u",
+ user_block_count);
return 1;
}
valid_user_blocks = le64_to_cpu(ckpt->valid_block_count);
if (valid_user_blocks > user_block_count) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid_user_blocks: %u, user_block_count: %u",
- valid_user_blocks, user_block_count);
+ f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u",
+ valid_user_blocks, user_block_count);
return 1;
}
@@ -2644,9 +2618,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
avail_node_count = sbi->total_node_count - sbi->nquota_files -
F2FS_RESERVED_NODE_NUM;
if (valid_node_count > avail_node_count) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong valid_node_count: %u, avail_node_count: %u",
- valid_node_count, avail_node_count);
+ f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u",
+ valid_node_count, avail_node_count);
return 1;
}
@@ -2660,10 +2633,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_node_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Node segment (%u, %u) has the same "
- "segno: %u", i, j,
- le32_to_cpu(ckpt->cur_node_segno[i]));
+ f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
return 1;
}
}
@@ -2675,10 +2647,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_data_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Data segment (%u, %u) has the same "
- "segno: %u", i, j,
- le32_to_cpu(ckpt->cur_data_segno[i]));
+ f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_data_segno[i]));
return 1;
}
}
@@ -2687,10 +2658,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
for (j = i; j < NR_CURSEG_DATA_TYPE; j++) {
if (le32_to_cpu(ckpt->cur_node_segno[i]) ==
le32_to_cpu(ckpt->cur_data_segno[j])) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Data segment (%u) and Data segment (%u)"
- " has the same segno: %u", i, j,
- le32_to_cpu(ckpt->cur_node_segno[i]));
+ f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u",
+ i, j,
+ le32_to_cpu(ckpt->cur_node_segno[i]));
return 1;
}
}
@@ -2701,9 +2671,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 ||
nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong bitmap size: sit: %u, nat:%u",
- sit_bitmap_size, nat_bitmap_size);
+ f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u",
+ sit_bitmap_size, nat_bitmap_size);
return 1;
}
@@ -2712,14 +2681,22 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
NR_CURSEG_TYPE) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Wrong cp_pack_start_sum: %u",
- cp_pack_start_sum);
+ f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
+ cp_pack_start_sum);
+ return 1;
+ }
+
+ if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) &&
+ le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) {
+ f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, "
+ "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, "
+ "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"",
+ le32_to_cpu(ckpt->checksum_offset));
return 1;
}
if (unlikely(f2fs_cp_error(sbi))) {
- f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
+ f2fs_err(sbi, "A bug case: need to run fsck");
return 1;
}
return 0;
@@ -2888,18 +2865,17 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
for (block = 0; block < 2; block++) {
bh = sb_bread(sb, block);
if (!bh) {
- f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock",
- block + 1);
+ f2fs_err(sbi, "Unable to read %dth superblock",
+ block + 1);
err = -EIO;
continue;
}
/* sanity checking of raw super */
if (sanity_check_raw_super(sbi, bh)) {
- f2fs_msg(sb, KERN_ERR,
- "Can't find valid F2FS filesystem in %dth superblock",
- block + 1);
- err = -EINVAL;
+ f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
+ block + 1);
+ err = -EFSCORRUPTED;
brelse(bh);
continue;
}
@@ -3028,36 +3004,32 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#ifdef CONFIG_BLK_DEV_ZONED
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
!f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Zoned block device feature not enabled\n");
+ f2fs_err(sbi, "Zoned block device feature not enabled\n");
return -EINVAL;
}
if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
if (init_blkz_info(sbi, i)) {
- f2fs_msg(sbi->sb, KERN_ERR,
- "Failed to initialize F2FS blkzone information");
+ f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk,
- bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
- "Host-aware" : "Host-managed");
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
+ "Host-aware" : "Host-managed");
continue;
}
#endif
- f2fs_msg(sbi->sb, KERN_INFO,
- "Mount Device [%2d]: %20s, %8u, %8x - %8x",
- i, FDEV(i).path,
- FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk);
- }
- f2fs_msg(sbi->sb, KERN_INFO,
- "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x",
+ i, FDEV(i).path,
+ FDEV(i).total_segments,
+ FDEV(i).start_blk, FDEV(i).end_blk);
+ }
+ f2fs_info(sbi,
+ "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
return 0;
}
@@ -3103,7 +3075,7 @@ try_onemore:
/* Load the checksum driver */
sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(sbi->s_chksum_driver)) {
- f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
+ f2fs_err(sbi, "Cannot load crc32 driver.");
err = PTR_ERR(sbi->s_chksum_driver);
sbi->s_chksum_driver = NULL;
goto free_sbi;
@@ -3111,7 +3083,7 @@ try_onemore:
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
- f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
+ f2fs_err(sbi, "unable to set blocksize");
goto free_sbi;
}
@@ -3135,8 +3107,7 @@ try_onemore:
*/
#ifndef CONFIG_BLK_DEV_ZONED
if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_msg(sb, KERN_ERR,
- "Zoned block device support is not enabled");
+ f2fs_err(sbi, "Zoned block device support is not enabled");
err = -EOPNOTSUPP;
goto free_sb_buf;
}
@@ -3160,10 +3131,7 @@ try_onemore:
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
- if (f2fs_sb_has_quota_ino(sbi))
- sb->s_qcop = &dquot_quotactl_sysfile_ops;
- else
- sb->s_qcop = &f2fs_quotactl_ops;
+ sb->s_qcop = &f2fs_quotactl_ops;
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
if (f2fs_sb_has_quota_ino(sbi)) {
@@ -3192,6 +3160,7 @@ try_onemore:
mutex_init(&sbi->gc_mutex);
mutex_init(&sbi->writepages);
mutex_init(&sbi->cp_mutex);
+ mutex_init(&sbi->resize_mutex);
init_rwsem(&sbi->node_write);
init_rwsem(&sbi->node_change);
@@ -3227,6 +3196,7 @@ try_onemore:
}
init_rwsem(&sbi->cp_rwsem);
+ init_rwsem(&sbi->quota_sem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);
@@ -3246,14 +3216,14 @@ try_onemore:
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
+ f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
goto free_io_dummy;
}
err = f2fs_get_valid_checkpoint(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint");
+ f2fs_err(sbi, "Failed to get valid F2FS checkpoint");
goto free_meta_inode;
}
@@ -3264,10 +3234,13 @@ try_onemore:
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
}
+ if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG))
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+
/* Initialize device list */
err = f2fs_scan_devices(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR, "Failed to find devices");
+ f2fs_err(sbi, "Failed to find devices");
goto free_devices;
}
@@ -3287,6 +3260,7 @@ try_onemore:
INIT_LIST_HEAD(&sbi->inode_list[i]);
spin_lock_init(&sbi->inode_lock[i]);
}
+ mutex_init(&sbi->flush_lock);
f2fs_init_extent_cache_info(sbi);
@@ -3297,14 +3271,14 @@ try_onemore:
/* setup f2fs internal modules */
err = f2fs_build_segment_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS segment manager");
+ f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)",
+ err);
goto free_sm;
}
err = f2fs_build_node_manager(sbi);
if (err) {
- f2fs_msg(sb, KERN_ERR,
- "Failed to initialize F2FS node manager");
+ f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)",
+ err);
goto free_nm;
}
@@ -3329,7 +3303,7 @@ try_onemore:
/* get an inode for node space */
sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi));
if (IS_ERR(sbi->node_inode)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read node inode");
+ f2fs_err(sbi, "Failed to read node inode");
err = PTR_ERR(sbi->node_inode);
goto free_stats;
}
@@ -3337,7 +3311,7 @@ try_onemore:
/* read root inode and dentry */
root = f2fs_iget(sb, F2FS_ROOT_INO(sbi));
if (IS_ERR(root)) {
- f2fs_msg(sb, KERN_ERR, "Failed to read root inode");
+ f2fs_err(sbi, "Failed to read root inode");
err = PTR_ERR(root);
goto free_node_inode;
}
@@ -3363,8 +3337,7 @@ try_onemore:
if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) {
err = f2fs_enable_quotas(sb);
if (err)
- f2fs_msg(sb, KERN_ERR,
- "Cannot turn on quotas: error %d", err);
+ f2fs_err(sbi, "Cannot turn on quotas: error %d", err);
}
#endif
/* if there are nt orphan nodes free them */
@@ -3384,13 +3357,10 @@ try_onemore:
if (f2fs_hw_is_readonly(sbi)) {
if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
err = -EROFS;
- f2fs_msg(sb, KERN_ERR,
- "Need to recover fsync data, but "
- "write access unavailable");
+ f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
goto free_meta;
}
- f2fs_msg(sbi->sb, KERN_INFO, "write access "
- "unavailable, skipping recovery");
+ f2fs_info(sbi, "write access unavailable, skipping recovery");
goto reset_checkpoint;
}
@@ -3405,8 +3375,8 @@ try_onemore:
if (err != -ENOMEM)
skip_recovery = true;
need_fsck = true;
- f2fs_msg(sb, KERN_ERR,
- "Cannot recover all fsync data errno=%d", err);
+ f2fs_err(sbi, "Cannot recover all fsync data errno=%d",
+ err);
goto free_meta;
}
} else {
@@ -3414,8 +3384,7 @@ try_onemore:
if (!f2fs_readonly(sb) && err > 0) {
err = -EINVAL;
- f2fs_msg(sb, KERN_ERR,
- "Need to recover fsync data");
+ f2fs_err(sbi, "Need to recover fsync data");
goto free_meta;
}
}
@@ -3446,17 +3415,16 @@ reset_checkpoint:
/* recover broken superblock */
if (recovery) {
err = f2fs_commit_super(sbi, true);
- f2fs_msg(sb, KERN_INFO,
- "Try to recover %dth superblock, ret: %d",
- sbi->valid_super_block ? 1 : 2, err);
+ f2fs_info(sbi, "Try to recover %dth superblock, ret: %d",
+ sbi->valid_super_block ? 1 : 2, err);
}
f2fs_join_shrinker(sbi);
f2fs_tuning_parameters(sbi);
- f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
- cur_cp_version(F2FS_CKPT(sbi)));
+ f2fs_notice(sbi, "Mounted with checkpoint version = %llx",
+ cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 5c85166677d4..3aeacd0aacfd 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -68,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a,
(unsigned long long)(dirty_segments(sbi)));
}
+static ssize_t unusable_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ block_t unusable;
+
+ if (test_opt(sbi, DISABLE_CHECKPOINT))
+ unusable = sbi->unusable_block_count;
+ else
+ unusable = f2fs_get_unusable_blocks(sbi);
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)unusable);
+}
+
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -440,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
+F2FS_GENERAL_RO_ATTR(unusable);
#ifdef CONFIG_FS_ENCRYPTION
F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO);
@@ -495,6 +510,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(inject_type),
#endif
ATTR_LIST(dirty_segments),
+ ATTR_LIST(unusable),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(reserved_blocks),
@@ -568,8 +584,7 @@ static int __maybe_unused segment_info_seq_show(struct seq_file *seq,
if ((i % 10) == 0)
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks);
if ((i % 10) == 9 || i == (total_segs - 1))
seq_putc(seq, '\n');
else
@@ -595,8 +610,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq,
struct seg_entry *se = get_seg_entry(sbi, i);
seq_printf(seq, "%-10d", i);
- seq_printf(seq, "%d|%-3u|", se->type,
- get_valid_blocks(sbi, i, false));
+ seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks);
for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++)
seq_printf(seq, " %.2x", se->cur_valid_map[j]);
seq_putc(seq, '\n');
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index e791741d193b..b32c45621679 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -346,7 +346,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name);
if (!*xe) {
- err = -EFAULT;
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ err = -EFSCORRUPTED;
goto out;
}
check:
@@ -622,7 +625,10 @@ static int __f2fs_setxattr(struct inode *inode, int index,
/* find entry with wanted name. */
here = __find_xattr(base_addr, last_base_addr, index, len, name);
if (!here) {
- error = -EFAULT;
+ f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr",
+ inode->i_ino);
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ error = -EFSCORRUPTED;
goto exit;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 8b0c2bfa90c1..52fa1ef8400b 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -136,27 +136,36 @@ static struct {
{FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA},
};
+static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags)
+{
+ int i;
+ u32 fsflags = 0;
+
+ if (S_ISDIR(inode->i_mode))
+ gfsflags &= ~GFS2_DIF_JDATA;
+ else
+ gfsflags &= ~GFS2_DIF_INHERIT_JDATA;
+
+ for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
+ if (gfsflags & fsflag_gfs2flag[i].gfsflag)
+ fsflags |= fsflag_gfs2flag[i].fsflag;
+ return fsflags;
+}
+
static int gfs2_get_flags(struct file *filp, u32 __user *ptr)
{
struct inode *inode = file_inode(filp);
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
- int i, error;
- u32 gfsflags, fsflags = 0;
+ int error;
+ u32 fsflags;
gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
error = gfs2_glock_nq(&gh);
if (error)
goto out_uninit;
- gfsflags = ip->i_diskflags;
- if (S_ISDIR(inode->i_mode))
- gfsflags &= ~GFS2_DIF_JDATA;
- else
- gfsflags &= ~GFS2_DIF_INHERIT_JDATA;
- for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++)
- if (gfsflags & fsflag_gfs2flag[i].gfsflag)
- fsflags |= fsflag_gfs2flag[i].fsflag;
+ fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
if (put_user(fsflags, ptr))
error = -EFAULT;
@@ -200,9 +209,11 @@ void gfs2_set_inode_flags(struct inode *inode)
* @filp: file pointer
* @reqflags: The flags to set
* @mask: Indicates which flags are valid
+ * @fsflags: The FS_* inode flags passed in
*
*/
-static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
+static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
+ const u32 fsflags)
{
struct inode *inode = file_inode(filp);
struct gfs2_inode *ip = GFS2_I(inode);
@@ -210,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
struct buffer_head *bh;
struct gfs2_holder gh;
int error;
- u32 new_flags, flags;
+ u32 new_flags, flags, oldflags;
error = mnt_want_write_file(filp);
if (error)
@@ -220,6 +231,11 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
if (error)
goto out_drop_write;
+ oldflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags);
+ error = vfs_ioc_setflags_prepare(inode, oldflags, fsflags);
+ if (error)
+ goto out;
+
error = -EACCES;
if (!inode_owner_or_capable(inode))
goto out;
@@ -308,7 +324,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr)
mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA);
}
- return do_gfs2_set_flags(filp, gfsflags, mask);
+ return do_gfs2_set_flags(filp, gfsflags, mask, fsflags);
}
static int gfs2_getlabel(struct file *filp, char __user *label)
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5e6502ef7415..ce15b9496b77 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -57,9 +57,8 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
return 0;
}
-static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
+static inline unsigned int hfsplus_getflags(struct inode *inode)
{
- struct inode *inode = file_inode(file);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
unsigned int flags = 0;
@@ -69,6 +68,13 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
flags |= FS_APPEND_FL;
if (hip->userflags & HFSPLUS_FLG_NODUMP)
flags |= FS_NODUMP_FL;
+ return flags;
+}
+
+static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags)
+{
+ struct inode *inode = file_inode(file);
+ unsigned int flags = hfsplus_getflags(inode);
return put_user(flags, user_flags);
}
@@ -78,6 +84,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
struct inode *inode = file_inode(file);
struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
unsigned int flags, new_fl = 0;
+ unsigned int oldflags = hfsplus_getflags(inode);
int err = 0;
err = mnt_want_write_file(file);
@@ -96,13 +103,9 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
inode_lock(inode);
- if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) ||
- inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
- goto out_unlock_inode;
- }
- }
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err)
+ goto out_unlock_inode;
/* don't silently ignore unsupported ext2 flags */
if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) {
diff --git a/fs/inode.c b/fs/inode.c
index 5f5431ec3d62..0f1e3b563c47 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2190,3 +2190,89 @@ struct timespec64 current_time(struct inode *inode)
return timespec64_trunc(now, inode->i_sb->s_time_gran);
}
EXPORT_SYMBOL(current_time);
+
+/*
+ * Generic function to check FS_IOC_SETFLAGS values and reject any invalid
+ * configurations.
+ *
+ * Note: the caller should be holding i_mutex, or else be sure that they have
+ * exclusive access to the inode structure.
+ */
+int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags,
+ unsigned int flags)
+{
+ /*
+ * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+ * the relevant capability.
+ *
+ * This test looks nicer. Thanks to Pauline Middelink
+ */
+ if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ return 0;
+}
+EXPORT_SYMBOL(vfs_ioc_setflags_prepare);
+
+/*
+ * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid
+ * configurations.
+ *
+ * Note: the caller should be holding i_mutex, or else be sure that they have
+ * exclusive access to the inode structure.
+ */
+int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa,
+ struct fsxattr *fa)
+{
+ /*
+ * Can't modify an immutable/append-only file unless we have
+ * appropriate permission.
+ */
+ if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
+ (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) &&
+ !capable(CAP_LINUX_IMMUTABLE))
+ return -EPERM;
+
+ /*
+ * Project Quota ID state is only allowed to change from within the init
+ * namespace. Enforce that restriction only if we are trying to change
+ * the quota ID state. Everything else is allowed in user namespaces.
+ */
+ if (current_user_ns() != &init_user_ns) {
+ if (old_fa->fsx_projid != fa->fsx_projid)
+ return -EINVAL;
+ if ((old_fa->fsx_xflags ^ fa->fsx_xflags) &
+ FS_XFLAG_PROJINHERIT)
+ return -EINVAL;
+ }
+
+ /* Check extent size hints. */
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode))
+ return -EINVAL;
+
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
+ !S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) &&
+ !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ return -EINVAL;
+
+ /*
+ * It is only valid to set the DAX flag on regular files and
+ * directories on filesystems.
+ */
+ if ((fa->fsx_xflags & FS_XFLAG_DAX) &&
+ !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+ return -EINVAL;
+
+ /* Extent size hints of zero turn off the flags. */
+ if (fa->fsx_extsize == 0)
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
+ if (fa->fsx_cowextsize == 0)
+ fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+
+ return 0;
+}
+EXPORT_SYMBOL(vfs_ioc_fssetxattr_check);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4ed4b110a154..3fd884b4e0be 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -231,6 +231,7 @@ struct io_ring_ctx {
struct task_struct *sqo_thread; /* if using sq thread polling */
struct mm_struct *sqo_mm;
wait_queue_head_t sqo_wait;
+ struct completion sqo_thread_started;
struct {
/* CQ ring */
@@ -322,6 +323,7 @@ struct io_kiocb {
struct io_ring_ctx *ctx;
struct list_head list;
+ struct list_head link_list;
unsigned int flags;
refcount_t refs;
#define REQ_F_NOWAIT 1 /* must not punt to workers */
@@ -330,8 +332,10 @@ struct io_kiocb {
#define REQ_F_SEQ_PREV 8 /* sequential with previous */
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */
+#define REQ_F_LINK 64 /* linked sqes */
+#define REQ_F_FAIL_LINK 128 /* fail rest of links */
u64 user_data;
- u32 error; /* iopoll result from callback */
+ u32 result;
u32 sequence;
struct work_struct work;
@@ -403,6 +407,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ctx->flags = p->flags;
init_waitqueue_head(&ctx->cq_wait);
init_completion(&ctx->ctx_done);
+ init_completion(&ctx->sqo_thread_started);
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) {
@@ -584,6 +589,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
req->flags = 0;
/* one is dropped after submission, the other at completion */
refcount_set(&req->refs, 2);
+ req->result = 0;
return req;
out:
io_ring_drop_ctx_refs(ctx, 1);
@@ -599,7 +605,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr)
}
}
-static void io_free_req(struct io_kiocb *req)
+static void __io_free_req(struct io_kiocb *req)
{
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
fput(req->file);
@@ -607,6 +613,63 @@ static void io_free_req(struct io_kiocb *req)
kmem_cache_free(req_cachep, req);
}
+static void io_req_link_next(struct io_kiocb *req)
+{
+ struct io_kiocb *nxt;
+
+ /*
+ * The list should never be empty when we are called here. But could
+ * potentially happen if the chain is messed up, check to be on the
+ * safe side.
+ */
+ nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
+ if (nxt) {
+ list_del(&nxt->list);
+ if (!list_empty(&req->link_list)) {
+ INIT_LIST_HEAD(&nxt->link_list);
+ list_splice(&req->link_list, &nxt->link_list);
+ nxt->flags |= REQ_F_LINK;
+ }
+
+ INIT_WORK(&nxt->work, io_sq_wq_submit_work);
+ queue_work(req->ctx->sqo_wq, &nxt->work);
+ }
+}
+
+/*
+ * Called if REQ_F_LINK is set, and we fail the head request
+ */
+static void io_fail_links(struct io_kiocb *req)
+{
+ struct io_kiocb *link;
+
+ while (!list_empty(&req->link_list)) {
+ link = list_first_entry(&req->link_list, struct io_kiocb, list);
+ list_del(&link->list);
+
+ io_cqring_add_event(req->ctx, link->user_data, -ECANCELED);
+ __io_free_req(link);
+ }
+}
+
+static void io_free_req(struct io_kiocb *req)
+{
+ /*
+ * If LINK is set, we have dependent requests in this chain. If we
+ * didn't fail this request, queue the first one up, moving any other
+ * dependencies to the next request. In case of failure, fail the rest
+ * of the chain.
+ */
+ if (req->flags & REQ_F_LINK) {
+ if (req->flags & REQ_F_FAIL_LINK)
+ io_fail_links(req);
+ else
+ io_req_link_next(req);
+ }
+
+ __io_free_req(req);
+}
+
static void io_put_req(struct io_kiocb *req)
{
if (refcount_dec_and_test(&req->refs))
@@ -628,16 +691,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
- io_cqring_fill_event(ctx, req->user_data, req->error);
+ io_cqring_fill_event(ctx, req->user_data, req->result);
(*nr_events)++;
if (refcount_dec_and_test(&req->refs)) {
/* If we're not using fixed files, we have to pair the
* completion part with the file put. Use regular
* completions for those, only batch free for fixed
- * file.
+ * file and non-linked commands.
*/
- if (req->flags & REQ_F_FIXED_FILE) {
+ if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+ REQ_F_FIXED_FILE) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
@@ -776,6 +840,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
kiocb_end_write(kiocb);
+ if ((req->flags & REQ_F_LINK) && res != req->result)
+ req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, req->user_data, res);
io_put_req(req);
}
@@ -786,7 +852,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
kiocb_end_write(kiocb);
- req->error = res;
+ if ((req->flags & REQ_F_LINK) && res != req->result)
+ req->flags |= REQ_F_FAIL_LINK;
+ req->result = res;
if (res != -EAGAIN)
req->flags |= REQ_F_IOPOLL_COMPLETED;
}
@@ -929,7 +997,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s,
!kiocb->ki_filp->f_op->iopoll)
return -EOPNOTSUPP;
- req->error = 0;
kiocb->ki_flags |= IOCB_HIPRI;
kiocb->ki_complete = io_complete_rw_iopoll;
} else {
@@ -1001,9 +1068,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
return 0;
}
-static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
- const struct sqe_submit *s, struct iovec **iovec,
- struct iov_iter *iter)
+static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw,
+ const struct sqe_submit *s, struct iovec **iovec,
+ struct iov_iter *iter)
{
const struct io_uring_sqe *sqe = s->sqe;
void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -1021,7 +1088,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw,
opcode = READ_ONCE(sqe->opcode);
if (opcode == IORING_OP_READ_FIXED ||
opcode == IORING_OP_WRITE_FIXED) {
- int ret = io_import_fixed(ctx, rw, sqe, iter);
+ ssize_t ret = io_import_fixed(ctx, rw, sqe, iter);
*iovec = NULL;
return ret;
}
@@ -1087,7 +1154,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
struct iov_iter iter;
struct file *file;
size_t iov_count;
- int ret;
+ ssize_t read_size, ret;
ret = io_prep_rw(req, s, force_nonblock);
if (ret)
@@ -1100,16 +1167,30 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s,
return -EINVAL;
ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter);
- if (ret)
+ if (ret < 0)
return ret;
+ read_size = ret;
+ if (req->flags & REQ_F_LINK)
+ req->result = read_size;
+
iov_count = iov_iter_count(&iter);
ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
- /* Catch -EAGAIN return for forced non-blocking submission */
ret2 = call_read_iter(file, kiocb, &iter);
+ /*
+ * In case of a short read, punt to async. This can happen
+ * if we have data partially cached. Alternatively we can
+ * return the short read, in which case the application will
+ * need to issue another SQE and wait for it. That SQE will
+ * need async punt anyway, so it's more efficient to do it
+ * here.
+ */
+ if (force_nonblock && ret2 > 0 && ret2 < read_size)
+ ret2 = -EAGAIN;
+ /* Catch -EAGAIN return for forced non-blocking submission */
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
} else {
@@ -1134,7 +1215,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
struct iov_iter iter;
struct file *file;
size_t iov_count;
- int ret;
+ ssize_t ret;
ret = io_prep_rw(req, s, force_nonblock);
if (ret)
@@ -1147,9 +1228,12 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
return -EINVAL;
ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter);
- if (ret)
+ if (ret < 0)
return ret;
+ if (req->flags & REQ_F_LINK)
+ req->result = ret;
+
iov_count = iov_iter_count(&iter);
ret = -EAGAIN;
@@ -1253,6 +1337,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
end > 0 ? end : LLONG_MAX,
fsync_flags & IORING_FSYNC_DATASYNC);
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
@@ -1297,11 +1383,70 @@ static int io_sync_file_range(struct io_kiocb *req,
ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
+ if (ret < 0 && (req->flags & REQ_F_LINK))
+ req->flags |= REQ_F_FAIL_LINK;
io_cqring_add_event(req->ctx, sqe->user_data, ret);
io_put_req(req);
return 0;
}
+#if defined(CONFIG_NET)
+static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock,
+ long (*fn)(struct socket *, struct user_msghdr __user *,
+ unsigned int))
+{
+ struct socket *sock;
+ int ret;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+
+ sock = sock_from_file(req->file, &ret);
+ if (sock) {
+ struct user_msghdr __user *msg;
+ unsigned flags;
+
+ flags = READ_ONCE(sqe->msg_flags);
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ msg = (struct user_msghdr __user *) (unsigned long)
+ READ_ONCE(sqe->addr);
+
+ ret = fn(sock, msg, flags);
+ if (force_nonblock && ret == -EAGAIN)
+ return ret;
+ }
+
+ io_cqring_add_event(req->ctx, sqe->user_data, ret);
+ io_put_req(req);
+ return 0;
+}
+#endif
+
+static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+ return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+ return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
static void io_poll_remove_one(struct io_kiocb *req)
{
struct io_poll_iocb *poll = &req->poll;
@@ -1549,9 +1694,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
{
int ret, opcode;
+ req->user_data = READ_ONCE(s->sqe->user_data);
+
if (unlikely(s->index >= ctx->sq_entries))
return -EINVAL;
- req->user_data = READ_ONCE(s->sqe->user_data);
opcode = READ_ONCE(s->sqe->opcode);
switch (opcode) {
@@ -1586,6 +1732,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
case IORING_OP_SYNC_FILE_RANGE:
ret = io_sync_file_range(req, s->sqe, force_nonblock);
break;
+ case IORING_OP_SENDMSG:
+ ret = io_sendmsg(req, s->sqe, force_nonblock);
+ break;
+ case IORING_OP_RECVMSG:
+ ret = io_recvmsg(req, s->sqe, force_nonblock);
+ break;
default:
ret = -EINVAL;
break;
@@ -1595,7 +1747,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
return ret;
if (ctx->flags & IORING_SETUP_IOPOLL) {
- if (req->error == -EAGAIN)
+ if (req->result == -EAGAIN)
return -EAGAIN;
/* workqueue context doesn't hold uring_lock, grab it now */
@@ -1819,31 +1971,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s,
return 0;
}
-static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
- struct io_submit_state *state)
+static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ struct sqe_submit *s)
{
- struct io_kiocb *req;
int ret;
- /* enforce forwards compatibility on users */
- if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN)))
- return -EINVAL;
-
- req = io_get_req(ctx, state);
- if (unlikely(!req))
- return -EAGAIN;
-
- ret = io_req_set_file(ctx, s, state, req);
- if (unlikely(ret))
- goto out;
-
- ret = io_req_defer(ctx, req, s->sqe);
- if (ret) {
- if (ret == -EIOCBQUEUED)
- ret = 0;
- return ret;
- }
-
ret = __io_submit_sqe(ctx, req, s, true);
if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
struct io_uring_sqe *sqe_copy;
@@ -1866,24 +1998,93 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
/*
* Queued up for async execution, worker will release
- * submit reference when the iocb is actually
- * submitted.
+ * submit reference when the iocb is actually submitted.
*/
return 0;
}
}
-out:
/* drop submission reference */
io_put_req(req);
/* and drop final reference, if we failed */
- if (ret)
+ if (ret) {
+ io_cqring_add_event(ctx, req->user_data, ret);
+ if (req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
io_put_req(req);
+ }
return ret;
}
+#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
+
+static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
+ struct io_submit_state *state, struct io_kiocb **link)
+{
+ struct io_uring_sqe *sqe_copy;
+ struct io_kiocb *req;
+ int ret;
+
+ /* enforce forwards compatibility on users */
+ if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ req = io_get_req(ctx, state);
+ if (unlikely(!req)) {
+ ret = -EAGAIN;
+ goto err;
+ }
+
+ ret = io_req_set_file(ctx, s, state, req);
+ if (unlikely(ret)) {
+err_req:
+ io_free_req(req);
+err:
+ io_cqring_add_event(ctx, s->sqe->user_data, ret);
+ return;
+ }
+
+ ret = io_req_defer(ctx, req, s->sqe);
+ if (ret) {
+ if (ret != -EIOCBQUEUED)
+ goto err_req;
+ return;
+ }
+
+ /*
+ * If we already have a head request, queue this one for async
+ * submittal once the head completes. If we don't have a head but
+ * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be
+ * submitted sync once the chain is complete. If none of those
+ * conditions are true (normal request), then just queue it.
+ */
+ if (*link) {
+ struct io_kiocb *prev = *link;
+
+ sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
+ if (!sqe_copy) {
+ ret = -EAGAIN;
+ goto err_req;
+ }
+
+ s->sqe = sqe_copy;
+ memcpy(&req->submit, s, sizeof(*s));
+ list_add_tail(&req->list, &prev->link_list);
+ } else if (s->sqe->flags & IOSQE_IO_LINK) {
+ req->flags |= REQ_F_LINK;
+
+ memcpy(&req->submit, s, sizeof(*s));
+ INIT_LIST_HEAD(&req->link_list);
+ *link = req;
+ } else {
+ io_queue_sqe(ctx, req, s);
+ }
+}
+
/*
* Batched submission is done, ensure local IO is flushed out.
*/
@@ -1966,7 +2167,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
unsigned int nr, bool has_user, bool mm_fault)
{
struct io_submit_state state, *statep = NULL;
- int ret, i, submitted = 0;
+ struct io_kiocb *link = NULL;
+ bool prev_was_link = false;
+ int i, submitted = 0;
if (nr > IO_PLUG_THRESHOLD) {
io_submit_state_start(&state, ctx, nr);
@@ -1974,22 +2177,30 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes,
}
for (i = 0; i < nr; i++) {
+ /*
+ * If previous wasn't linked and we have a linked command,
+ * that's the end of the chain. Submit the previous link.
+ */
+ if (!prev_was_link && link) {
+ io_queue_sqe(ctx, link, &link->submit);
+ link = NULL;
+ }
+ prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0;
+
if (unlikely(mm_fault)) {
- ret = -EFAULT;
+ io_cqring_add_event(ctx, sqes[i].sqe->user_data,
+ -EFAULT);
} else {
sqes[i].has_user = has_user;
sqes[i].needs_lock = true;
sqes[i].needs_fixed_file = true;
- ret = io_submit_sqe(ctx, &sqes[i], statep);
- }
- if (!ret) {
+ io_submit_sqe(ctx, &sqes[i], statep, &link);
submitted++;
- continue;
}
-
- io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret);
}
+ if (link)
+ io_queue_sqe(ctx, link, &link->submit);
if (statep)
io_submit_state_end(&state);
@@ -2006,6 +2217,8 @@ static int io_sq_thread(void *data)
unsigned inflight;
unsigned long timeout;
+ complete(&ctx->sqo_thread_started);
+
old_fs = get_fs();
set_fs(USER_DS);
@@ -2130,6 +2343,8 @@ static int io_sq_thread(void *data)
static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
{
struct io_submit_state state, *statep = NULL;
+ struct io_kiocb *link = NULL;
+ bool prev_was_link = false;
int i, submit = 0;
if (to_submit > IO_PLUG_THRESHOLD) {
@@ -2139,22 +2354,30 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit)
for (i = 0; i < to_submit; i++) {
struct sqe_submit s;
- int ret;
if (!io_get_sqring(ctx, &s))
break;
+ /*
+ * If previous wasn't linked and we have a linked command,
+ * that's the end of the chain. Submit the previous link.
+ */
+ if (!prev_was_link && link) {
+ io_queue_sqe(ctx, link, &link->submit);
+ link = NULL;
+ }
+ prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0;
+
s.has_user = true;
s.needs_lock = false;
s.needs_fixed_file = false;
submit++;
-
- ret = io_submit_sqe(ctx, &s, statep);
- if (ret)
- io_cqring_add_event(ctx, s.sqe->user_data, ret);
+ io_submit_sqe(ctx, &s, statep, &link);
}
io_commit_sqring(ctx);
+ if (link)
+ io_queue_sqe(ctx, link, &link->submit);
if (statep)
io_submit_state_end(statep);
@@ -2240,6 +2463,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
static void io_sq_thread_stop(struct io_ring_ctx *ctx)
{
if (ctx->sqo_thread) {
+ wait_for_completion(&ctx->sqo_thread_started);
/*
* The park is a bit of a work-around, without it we get
* warning spews on shutdown with SQPOLL set and affinity
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index ba34dae8bd9f..10ee0ecca1a8 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -98,24 +98,16 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
/* Lock against other parallel changes of flags */
inode_lock(inode);
- oldflags = jfs_inode->mode2;
-
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- */
- if ((oldflags & JFS_IMMUTABLE_FL) ||
- ((flags ^ oldflags) &
- (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- inode_unlock(inode);
- err = -EPERM;
- goto setflags_out;
- }
+ oldflags = jfs_map_ext2(jfs_inode->mode2 & JFS_FL_USER_VISIBLE,
+ 0);
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err) {
+ inode_unlock(inode);
+ goto setflags_out;
}
flags = flags & JFS_FL_USER_MODIFIABLE;
- flags |= oldflags & ~JFS_FL_USER_MODIFIABLE;
+ flags |= jfs_inode->mode2 & ~JFS_FL_USER_MODIFIABLE;
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 9b96d79eea6c..91b9dac6b2cc 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -148,13 +148,8 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
oldflags = NILFS_I(inode)->i_flags;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by the
- * relevant capability.
- */
- ret = -EPERM;
- if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) &&
- !capable(CAP_LINUX_IMMUTABLE))
+ ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (ret)
goto out;
ret = nilfs_transaction_begin(inode->i_sb, &ti, 0);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 994726ada857..d6f7b299eb23 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -106,16 +106,9 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
flags = flags & mask;
flags |= oldflags & ~mask;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- */
- status = -EPERM;
- if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
- (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
- if (!capable(CAP_LINUX_IMMUTABLE))
- goto bail_unlock;
- }
+ status = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (status)
+ goto bail_unlock;
handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
if (IS_ERR(handle)) {
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index a35c17017210..679a3c8e4fb3 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -357,11 +357,28 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb,
return ret;
}
+static int orangefs_getflags(struct inode *inode, unsigned long *uval)
+{
+ __u64 val = 0;
+ int ret;
+
+ ret = orangefs_inode_getxattr(inode,
+ "user.pvfs2.meta_hint",
+ &val, sizeof(val));
+ if (ret < 0 && ret != -ENODATA)
+ return ret;
+ else if (ret == -ENODATA)
+ val = 0;
+ *uval = val;
+ return 0;
+}
+
/*
* Perform a miscellaneous operation on a file.
*/
static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
+ struct inode *inode = file_inode(file);
int ret = -ENOTTY;
__u64 val = 0;
unsigned long uval;
@@ -375,20 +392,16 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
* and append flags
*/
if (cmd == FS_IOC_GETFLAGS) {
- val = 0;
- ret = orangefs_inode_getxattr(file_inode(file),
- "user.pvfs2.meta_hint",
- &val, sizeof(val));
- if (ret < 0 && ret != -ENODATA)
+ ret = orangefs_getflags(inode, &uval);
+ if (ret)
return ret;
- else if (ret == -ENODATA)
- val = 0;
- uval = val;
gossip_debug(GOSSIP_FILE_DEBUG,
"orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
(unsigned long long)uval);
return put_user(uval, (int __user *)arg);
} else if (cmd == FS_IOC_SETFLAGS) {
+ unsigned long old_uval;
+
ret = 0;
if (get_user(uval, (int __user *)arg))
return -EFAULT;
@@ -404,11 +417,17 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar
gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
return -EINVAL;
}
+ ret = orangefs_getflags(inode, &old_uval);
+ if (ret)
+ return ret;
+ ret = vfs_ioc_setflags_prepare(inode, old_uval, uval);
+ if (ret)
+ return ret;
val = uval;
gossip_debug(GOSSIP_FILE_DEBUG,
"orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
(unsigned long long)val);
- ret = orangefs_inode_setxattr(file_inode(file),
+ ret = orangefs_inode_setxattr(inode,
"user.pvfs2.meta_hint",
&val, sizeof(val), 0);
}
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index acbbaf7a0bb2..45e1a5d11af3 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -74,13 +74,11 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = -EPERM;
goto setflags_out;
}
- if (((flags ^ REISERFS_I(inode)->
- i_attrs) & (REISERFS_IMMUTABLE_FL |
- REISERFS_APPEND_FL))
- && !capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
+ err = vfs_ioc_setflags_prepare(inode,
+ REISERFS_I(inode)->i_attrs,
+ flags);
+ if (err)
goto setflags_out;
- }
if ((flags & REISERFS_NOTAIL_FL) &&
S_ISREG(inode->i_mode)) {
int result;
diff --git a/fs/splice.c b/fs/splice.c
index 14cb602d9a2f..98412721f056 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1356,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- long error;
+ ssize_t error;
struct fd f;
int type;
@@ -1367,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
error = import_iovec(type, uiov, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
- if (!error) {
+ if (error >= 0) {
error = do_vmsplice(f.file, &iter, flags);
kfree(iov);
}
@@ -1382,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
- long error;
+ ssize_t error;
struct fd f;
int type;
@@ -1393,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
error = compat_import_iovec(type, iov32, nr_segs,
ARRAY_SIZE(iovstack), &iov, &iter);
- if (!error) {
+ if (error >= 0) {
error = do_vmsplice(f.file, &iter, flags);
kfree(iov);
}
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 4f1a397fda69..034ad14710d1 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -107,18 +107,11 @@ static int setflags(struct inode *inode, int flags)
if (err)
return err;
- /*
- * The IMMUTABLE and APPEND_ONLY flags can only be changed by
- * the relevant capability.
- */
mutex_lock(&ui->ui_mutex);
oldflags = ubifs2ioctl(ui->flags);
- if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
- goto out_unlock;
- }
- }
+ err = vfs_ioc_setflags_prepare(inode, oldflags, flags);
+ if (err)
+ goto out_unlock;
ui->flags = ioctl2ubifs(flags);
ubifs_set_inode_flags(inode);
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 91831975363b..b74a47169297 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -62,6 +62,7 @@ xfs-y += xfs_aops.o \
xfs_attr_inactive.o \
xfs_attr_list.o \
xfs_bmap_util.o \
+ xfs_bio_io.o \
xfs_buf.o \
xfs_dir2_readdir.o \
xfs_discard.o \
@@ -80,9 +81,11 @@ xfs-y += xfs_aops.o \
xfs_iops.o \
xfs_inode.o \
xfs_itable.o \
+ xfs_iwalk.o \
xfs_message.o \
xfs_mount.o \
xfs_mru_cache.o \
+ xfs_pwork.o \
xfs_reflink.o \
xfs_stats.o \
xfs_super.o \
@@ -104,12 +107,8 @@ xfs-y += xfs_log.o \
xfs_rmap_item.o \
xfs_log_recover.o \
xfs_trans_ail.o \
- xfs_trans_bmap.o \
xfs_trans_buf.o \
- xfs_trans_extfree.o \
- xfs_trans_inode.o \
- xfs_trans_refcount.o \
- xfs_trans_rmap.o \
+ xfs_trans_inode.o
# optional features
xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index fdd9d6ede25c..16bb9a328678 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -3,12 +3,7 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/mm.h>
#include <linux/sched/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include "kmem.h"
#include "xfs_message.h"
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 8e6b3ba81c03..267655acd426 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
return kmem_zone_alloc(zone, flags | KM_ZERO);
}
+static inline struct page *
+kmem_to_page(void *addr)
+{
+ if (is_vmalloc_addr(addr))
+ return vmalloc_to_page(addr);
+ return virt_to_page(addr);
+}
+
#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index b0c89f54d1bb..5de296b34ab1 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -10,6 +10,7 @@
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
@@ -44,6 +45,12 @@ xfs_get_aghdr_buf(
return bp;
}
+static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
+{
+ return mp->m_sb.sb_logstart > 0 &&
+ id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+}
+
/*
* Generic btree root block init function
*/
@@ -53,40 +60,85 @@ xfs_btroot_init(
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0);
+ xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
}
-/*
- * Alloc btree root block init functions
- */
+/* Finish initializing a free space btree. */
static void
-xfs_bnoroot_init(
+xfs_freesp_init_recs(
struct xfs_mount *mp,
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
struct xfs_alloc_rec *arec;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
+
+ if (is_log_ag(mp, id)) {
+ struct xfs_alloc_rec *nrec;
+ xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp,
+ mp->m_sb.sb_logstart);
+
+ ASSERT(start >= mp->m_ag_prealloc_blocks);
+ if (start != mp->m_ag_prealloc_blocks) {
+ /*
+ * Modify first record to pad stripe align of log
+ */
+ arec->ar_blockcount = cpu_to_be32(start -
+ mp->m_ag_prealloc_blocks);
+ nrec = arec + 1;
+
+ /*
+ * Insert second record at start of internal log
+ * which then gets trimmed.
+ */
+ nrec->ar_startblock = cpu_to_be32(
+ be32_to_cpu(arec->ar_startblock) +
+ be32_to_cpu(arec->ar_blockcount));
+ arec = nrec;
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
+ /*
+ * Change record start to after the internal log
+ */
+ be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
+ }
+
+ /*
+ * Calculate the record block count and check for the case where
+ * the log might have consumed all available space in the AG. If
+ * so, reset the record count to 0 to avoid exposure of an invalid
+ * record start block.
+ */
arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock));
+ if (!arec->ar_blockcount)
+ block->bb_numrecs = 0;
}
+/*
+ * Alloc btree root block init functions
+ */
static void
-xfs_cntroot_init(
+xfs_bnoroot_init(
struct xfs_mount *mp,
struct xfs_buf *bp,
struct aghdr_init_data *id)
{
- struct xfs_alloc_rec *arec;
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
+ xfs_freesp_init_recs(mp, bp, id);
+}
- xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0);
- arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
- arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
- arec->ar_blockcount = cpu_to_be32(id->agsize -
- be32_to_cpu(arec->ar_startblock));
+static void
+xfs_cntroot_init(
+ struct xfs_mount *mp,
+ struct xfs_buf *bp,
+ struct aghdr_init_data *id)
+{
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
+ xfs_freesp_init_recs(mp, bp, id);
}
/*
@@ -101,7 +153,7 @@ xfs_rmaproot_init(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_rmap_rec *rrec;
- xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0);
+ xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
/*
* mark the AG header regions as static metadata The BNO
@@ -149,6 +201,18 @@ xfs_rmaproot_init(
rrec->rm_offset = 0;
be16_add_cpu(&block->bb_numrecs, 1);
}
+
+ /* account for the log space */
+ if (is_log_ag(mp, id)) {
+ rrec = XFS_RMAP_REC_ADDR(block,
+ be16_to_cpu(block->bb_numrecs) + 1);
+ rrec->rm_startblock = cpu_to_be32(
+ XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
+ rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
+ rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
+ rrec->rm_offset = 0;
+ be16_add_cpu(&block->bb_numrecs, 1);
+ }
}
/*
@@ -209,6 +273,14 @@ xfs_agfblock_init(
agf->agf_refcount_level = cpu_to_be32(1);
agf->agf_refcount_blocks = cpu_to_be32(1);
}
+
+ if (is_log_ag(mp, id)) {
+ int64_t logblocks = mp->m_sb.sb_logblocks;
+
+ be32_add_cpu(&agf->agf_freeblks, -logblocks);
+ agf->agf_longest = cpu_to_be32(id->agsize -
+ XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
+ }
}
static void
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e2ba2a3b63b2..87a9747f1d36 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -9,20 +9,12 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_alloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
#include "xfs_rmap_btree.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a9ff3cf82cce..372ad55631fc 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -13,7 +13,6 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
#include "xfs_alloc_btree.h"
@@ -21,7 +20,6 @@
#include "xfs_extent_busy.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
@@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq;
STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
-STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
- xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
/*
* Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
@@ -555,7 +551,7 @@ static xfs_failaddr_t
xfs_agfl_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
int i;
@@ -596,7 +592,7 @@ static void
xfs_agfl_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
/*
@@ -621,7 +617,7 @@ static void
xfs_agfl_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
@@ -700,6 +696,107 @@ xfs_alloc_update_counters(
*/
/*
+ * Deal with the case where only small freespaces remain. Either return the
+ * contents of the last freespace record, or allocate space from the freelist if
+ * there is nothing in the tree.
+ */
+STATIC int /* error */
+xfs_alloc_ag_vextent_small(
+ struct xfs_alloc_arg *args, /* allocation argument structure */
+ struct xfs_btree_cur *ccur, /* optional by-size cursor */
+ xfs_agblock_t *fbnop, /* result block number */
+ xfs_extlen_t *flenp, /* result length */
+ int *stat) /* status: 0-freelist, 1-normal/none */
+{
+ int error = 0;
+ xfs_agblock_t fbno = NULLAGBLOCK;
+ xfs_extlen_t flen = 0;
+ int i = 0;
+
+ /*
+ * If a cntbt cursor is provided, try to allocate the largest record in
+ * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
+ * allocation. Make sure to respect minleft even when pulling from the
+ * freelist.
+ */
+ if (ccur)
+ error = xfs_btree_decrement(ccur, 0, &i);
+ if (error)
+ goto error;
+ if (i) {
+ error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
+ if (error)
+ goto error;
+ XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
+ goto out;
+ }
+
+ if (args->minlen != 1 || args->alignment != 1 ||
+ args->resv == XFS_AG_RESV_AGFL ||
+ (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
+ args->minleft))
+ goto out;
+
+ error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
+ if (error)
+ goto error;
+ if (fbno == NULLAGBLOCK)
+ goto out;
+
+ xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
+ xfs_alloc_allow_busy_reuse(args->datatype));
+
+ if (xfs_alloc_is_userdata(args->datatype)) {
+ struct xfs_buf *bp;
+
+ bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
+ if (!bp) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ xfs_trans_binval(args->tp, bp);
+ }
+ *fbnop = args->agbno = fbno;
+ *flenp = args->len = 1;
+ XFS_WANT_CORRUPTED_GOTO(args->mp,
+ fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
+ error);
+ args->wasfromfl = 1;
+ trace_xfs_alloc_small_freelist(args);
+
+ /*
+ * If we're feeding an AGFL block to something that doesn't live in the
+ * free space, we need to clear out the OWN_AG rmap.
+ */
+ error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
+ &XFS_RMAP_OINFO_AG);
+ if (error)
+ goto error;
+
+ *stat = 0;
+ return 0;
+
+out:
+ /*
+ * Can't do the allocation, give up.
+ */
+ if (flen < args->minlen) {
+ args->agbno = NULLAGBLOCK;
+ trace_xfs_alloc_small_notenough(args);
+ flen = 0;
+ }
+ *fbnop = fbno;
+ *flenp = flen;
+ *stat = 1;
+ trace_xfs_alloc_small_done(args);
+ return 0;
+
+error:
+ trace_xfs_alloc_small_error(args);
+ return error;
+}
+
+/*
* Allocate a variable extent in the allocation group agno.
* Type and bno are used to determine where in the allocation group the
* extent will start.
@@ -1583,112 +1680,6 @@ out_nominleft:
}
/*
- * Deal with the case where only small freespaces remain.
- * Either return the contents of the last freespace record,
- * or allocate space from the freelist if there is nothing in the tree.
- */
-STATIC int /* error */
-xfs_alloc_ag_vextent_small(
- xfs_alloc_arg_t *args, /* allocation argument structure */
- xfs_btree_cur_t *ccur, /* by-size cursor */
- xfs_agblock_t *fbnop, /* result block number */
- xfs_extlen_t *flenp, /* result length */
- int *stat) /* status: 0-freelist, 1-normal/none */
-{
- int error;
- xfs_agblock_t fbno;
- xfs_extlen_t flen;
- int i;
-
- if ((error = xfs_btree_decrement(ccur, 0, &i)))
- goto error0;
- if (i) {
- if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- }
- /*
- * Nothing in the btree, try the freelist. Make sure
- * to respect minleft even when pulling from the
- * freelist.
- */
- else if (args->minlen == 1 && args->alignment == 1 &&
- args->resv != XFS_AG_RESV_AGFL &&
- (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
- > args->minleft)) {
- error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
- if (error)
- goto error0;
- if (fbno != NULLAGBLOCK) {
- xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
- xfs_alloc_allow_busy_reuse(args->datatype));
-
- if (xfs_alloc_is_userdata(args->datatype)) {
- xfs_buf_t *bp;
-
- bp = xfs_btree_get_bufs(args->mp, args->tp,
- args->agno, fbno, 0);
- if (!bp) {
- error = -EFSCORRUPTED;
- goto error0;
- }
- xfs_trans_binval(args->tp, bp);
- }
- args->len = 1;
- args->agbno = fbno;
- XFS_WANT_CORRUPTED_GOTO(args->mp,
- args->agbno + args->len <=
- be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
- error0);
- args->wasfromfl = 1;
- trace_xfs_alloc_small_freelist(args);
-
- /*
- * If we're feeding an AGFL block to something that
- * doesn't live in the free space, we need to clear
- * out the OWN_AG rmap.
- */
- error = xfs_rmap_free(args->tp, args->agbp, args->agno,
- fbno, 1, &XFS_RMAP_OINFO_AG);
- if (error)
- goto error0;
-
- *stat = 0;
- return 0;
- }
- /*
- * Nothing in the freelist.
- */
- else
- flen = 0;
- }
- /*
- * Can't allocate from the freelist for some reason.
- */
- else {
- fbno = NULLAGBLOCK;
- flen = 0;
- }
- /*
- * Can't do the allocation, give up.
- */
- if (flen < args->minlen) {
- args->agbno = NULLAGBLOCK;
- trace_xfs_alloc_small_notenough(args);
- flen = 0;
- }
- *fbnop = fbno;
- *flenp = flen;
- *stat = 1;
- trace_xfs_alloc_small_done(args);
- return 0;
-
-error0:
- trace_xfs_alloc_small_error(args);
- return error;
-}
-
-/*
* Free the extent starting at agno/bno for length.
*/
STATIC int
@@ -2095,7 +2086,7 @@ xfs_free_agfl_block(
if (error)
return error;
- bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0);
+ bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
if (!bp)
return -EFSCORRUPTED;
xfs_trans_binval(tp, bp);
@@ -2586,7 +2577,7 @@ static xfs_failaddr_t
xfs_agf_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -2644,7 +2635,7 @@ static void
xfs_agf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2661,7 +2652,7 @@ static void
xfs_agf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
@@ -3146,7 +3137,7 @@ xfs_alloc_has_record(
/*
* Walk all the blocks in the AGFL. The @walk_fn can return any negative
- * error code or XFS_BTREE_QUERY_RANGE_ABORT.
+ * error code or XFS_ITER_*.
*/
int
xfs_agfl_walk(
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 9fe949f6055e..2a94543857a1 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -17,7 +17,6 @@
#include "xfs_extent_busy.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
@@ -292,7 +291,7 @@ static xfs_failaddr_t
xfs_allocbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c441f41f14e8..d48fcf11cc35 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -9,23 +9,18 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr_sf.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_remote.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 3b0dce06e454..ff28ebf3b635 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context {
struct xfs_inode *dp; /* inode */
struct attrlist_cursor_kern *cursor; /* position in list */
char *alist; /* output buffer */
- int seen_enough; /* T/F: seen enough of list? */
+
+ /*
+ * Abort attribute list iteration if non-zero. Can be used to pass
+ * error values to the xfs_attr_list caller.
+ */
+ int seen_enough;
+
ssize_t count; /* num used entries */
int dupcnt; /* count dup hashvals seen */
int bufsize; /* total buffer size */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 1f6e3965ff74..70eb941d02e4 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -10,14 +10,12 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_attr_sf.h"
@@ -27,7 +25,6 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_dir2.h"
#include "xfs_log.h"
@@ -240,7 +237,7 @@ xfs_attr3_leaf_verify(
struct xfs_buf *bp)
{
struct xfs_attr3_icleaf_hdr ichdr;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entries;
uint32_t end; /* must be 32bit - see below */
@@ -313,7 +310,7 @@ static void
xfs_attr3_leaf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
@@ -343,7 +340,7 @@ static void
xfs_attr3_leaf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -865,7 +862,7 @@ xfs_attr_shortform_allfit(
struct xfs_attr3_icleaf_hdr leafhdr;
int bytes;
int i;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
@@ -1525,7 +1522,7 @@ xfs_attr_leaf_order(
{
struct xfs_attr3_icleaf_hdr ichdr1;
struct xfs_attr3_icleaf_hdr ichdr2;
- struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
+ struct xfs_mount *mp = leaf1_bp->b_mount;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
@@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash(
{
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_attr_leaf_entry *entries;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
entries = xfs_attr3_leaf_entryp(bp->b_addr);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 65ff600a8067..4eb30d357045 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -16,18 +16,10 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_attr_remote.h"
-#include "xfs_trans_space.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
-#include "xfs_buf_item.h"
#include "xfs_error.h"
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
@@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify(
bool check_crc,
xfs_failaddr_t *failaddr)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
char *ptr;
int len;
xfs_daddr_t bno;
@@ -175,7 +167,7 @@ static void
xfs_attr3_rmt_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
int blksize = mp->m_attr_geo->blksize;
char *ptr;
@@ -535,7 +527,7 @@ xfs_attr_rmtval_set(
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
- bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0);
+ bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
if (!bp)
return -ENOMEM;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 40ce5f3094d1..7071ff98fdbc 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -5,7 +5,6 @@
*/
#include "xfs.h"
#include "xfs_log_format.h"
-#include "xfs_bit.h"
/*
* XFS bit manipulation routines, used in non-realtime code.
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 356ebd1cbe82..baf0b72c0a37 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -13,14 +13,10 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
@@ -32,7 +28,6 @@
#include "xfs_trans_space.h"
#include "xfs_buf_item.h"
#include "xfs_trace.h"
-#include "xfs_symlink.h"
#include "xfs_attr_leaf.h"
#include "xfs_filestream.h"
#include "xfs_rmap.h"
@@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents(
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
- error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
@@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents(
bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
if (!bp) {
bp_release = 1;
- error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
@@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents(
XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
xfs_btree_check_lptr(cur, cbno, 1));
#endif
- error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF,
+ error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
@@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree(
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
- abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
+ abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
if (!abp) {
error = -EFSCORRUPTED;
goto out_unreserve_dquot;
@@ -878,7 +873,7 @@ xfs_bmap_local_to_extents(
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1);
tp->t_firstblock = args.fsbno;
- bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
+ bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
/*
* Initialize the block, copy the data and log the remote buffer.
@@ -1203,7 +1198,7 @@ xfs_iread_extents(
* pointer (leftmost) at each level.
*/
while (level-- > 0) {
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp,
XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
if (error)
goto out;
@@ -1276,7 +1271,7 @@ xfs_iread_extents(
*/
if (bno == NULLFSBLOCK)
break;
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp,
XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
if (error)
goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index aff82ed112c9..fbb18ba5d905 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -11,10 +11,8 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
@@ -22,7 +20,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_rmap.h"
/*
@@ -411,7 +408,7 @@ static xfs_failaddr_t
xfs_bmbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa;
unsigned int level;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index bbdae2b4559f..f1048efa4268 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -11,16 +11,13 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
#include "xfs_btree.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_alloc.h"
#include "xfs_log.h"
@@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_buf_log_item *bip = bp->b_log_item;
- if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
return;
if (bip)
block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
@@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc(
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_buf_log_item *bip = bp->b_log_item;
- if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+ if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
return;
if (bip)
block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (xfs_sb_version_hascrc(&mp->m_sb)) {
if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
@@ -691,14 +688,13 @@ xfs_buf_t * /* buffer for fsbno */
xfs_btree_get_bufl(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- uint lock) /* lock flags for get_buf */
+ xfs_fsblock_t fsbno) /* file system block number */
{
xfs_daddr_t d; /* real disk block address */
ASSERT(fsbno != NULLFSBLOCK);
d = XFS_FSB_TO_DADDR(mp, fsbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
}
/*
@@ -710,15 +706,14 @@ xfs_btree_get_bufs(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- uint lock) /* lock flags for get_buf */
+ xfs_agblock_t agbno) /* allocation group block number */
{
xfs_daddr_t d; /* real disk block address */
ASSERT(agno != NULLAGNUMBER);
ASSERT(agbno != NULLAGBLOCK);
d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
+ return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
}
/*
@@ -845,7 +840,6 @@ xfs_btree_read_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
- uint lock, /* lock flags for read_buf */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops)
@@ -858,7 +852,7 @@ xfs_btree_read_bufl(
return -EFSCORRUPTED;
d = XFS_FSB_TO_DADDR(mp, fsbno);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
- mp->m_bsize, lock, &bp, ops);
+ mp->m_bsize, 0, &bp, ops);
if (error)
return error;
if (bp)
@@ -1185,11 +1179,10 @@ xfs_btree_init_block(
xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
- __u64 owner,
- unsigned int flags)
+ __u64 owner)
{
xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
- btnum, level, numrecs, owner, flags);
+ btnum, level, numrecs, owner, 0);
}
STATIC void
@@ -1288,7 +1281,6 @@ STATIC int
xfs_btree_get_buf_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr,
- int flags,
struct xfs_btree_block **block,
struct xfs_buf **bpp)
{
@@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block(
xfs_daddr_t d;
int error;
- /* need to sort out how callers deal with failures first */
- ASSERT(!(flags & XBF_TRYLOCK));
-
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
- mp->m_bsize, flags);
+ mp->m_bsize, 0);
if (!*bpp)
return -ENOMEM;
@@ -2706,7 +2695,7 @@ __xfs_btree_split(
XFS_BTREE_STATS_INC(cur, alloc);
/* Set up the new block as "right". */
- error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
+ error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
if (error)
goto error0;
@@ -2961,7 +2950,7 @@ xfs_btree_new_iroot(
XFS_BTREE_STATS_INC(cur, alloc);
/* Copy the root into a real block. */
- error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
+ error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
if (error)
goto error0;
@@ -3058,7 +3047,7 @@ xfs_btree_new_root(
XFS_BTREE_STATS_INC(cur, alloc);
/* Set up the new block. */
- error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
+ error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
if (error)
goto error0;
@@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify(
struct xfs_buf *bp,
uint64_t owner)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
/* numrecs verification */
@@ -4484,7 +4473,7 @@ xfs_failaddr_t
xfs_btree_sblock_v5hdr_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
@@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify(
struct xfs_buf *bp,
unsigned int max_recs)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_agblock_t agno;
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index e3b3e9dce5da..fa3cd8ab9aba 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -301,8 +301,7 @@ struct xfs_buf * /* buffer for fsbno */
xfs_btree_get_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t fsbno, /* file system block number */
- uint lock); /* lock flags for get_buf */
+ xfs_fsblock_t fsbno); /* file system block number */
/*
* Get a buffer for the block, return it with no data read.
@@ -313,8 +312,7 @@ xfs_btree_get_bufs(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno, /* allocation group block number */
- uint lock); /* lock flags for get_buf */
+ xfs_agblock_t agbno); /* allocation group block number */
/*
* Check for the cursor referring to the last block at the given level.
@@ -345,7 +343,6 @@ xfs_btree_read_bufl(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t fsbno, /* file system block number */
- uint lock, /* lock flags for read_buf */
struct xfs_buf **bpp, /* buffer for fsbno */
int refval, /* ref count value for buffer */
const struct xfs_buf_ops *ops);
@@ -383,8 +380,7 @@ xfs_btree_init_block(
xfs_btnum_t btnum,
__u16 level,
__u16 numrecs,
- __u64 owner,
- unsigned int flags);
+ __u64 owner);
void
xfs_btree_init_block_int(
@@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
/* return codes */
-#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
-#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */
+#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */
+#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */
typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
union xfs_btree_rec *rec, void *priv);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e2737e2ac2ae..d1c77fd0815d 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -12,20 +12,14 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
#include "xfs_bmap.h"
-#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
@@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify(
struct xfs_buf *bp,
struct xfs_da3_blkinfo *hdr3)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_da_blkinfo *hdr = &hdr3->hdr;
if (!xfs_verify_magic16(bp, hdr->magic))
@@ -148,7 +142,7 @@ static xfs_failaddr_t
xfs_da3_node_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_da_intnode *hdr = bp->b_addr;
struct xfs_da3_icnode_hdr ichdr;
const struct xfs_dir_ops *ops;
@@ -186,7 +180,7 @@ static void
xfs_da3_node_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index b39053dcb643..b1ae572496b6 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -11,11 +11,8 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
/*
* Shortform directory ops
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1c6bf2105939..eb2be2a6a25a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -9,8 +9,6 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_trans.h"
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 156ce95c9c45..67840723edbb 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -5,20 +5,16 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index b7d6d78f4ce2..a6fb0cc2085e 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -6,22 +6,19 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_log.h"
/*
@@ -50,7 +47,7 @@ static xfs_failaddr_t
xfs_dir3_block_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_verify_magic(bp, hdr3->magic))
@@ -71,7 +68,7 @@ static void
xfs_dir3_block_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -88,7 +85,7 @@ static void
xfs_dir3_block_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index b7b9ce002cb9..2c79be4c3153 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -6,19 +6,16 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_log.h"
static xfs_failaddr_t xfs_dir2_data_freefind_verify(
@@ -50,14 +47,13 @@ __xfs_dir3_data_check(
int i; /* leaf index */
int lastfree; /* last entry was unused */
xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */
- xfs_mount_t *mp; /* filesystem mount point */
+ struct xfs_mount *mp = bp->b_mount;
char *p; /* current data position */
int stale; /* count of stale leaves */
struct xfs_name name;
const struct xfs_dir_ops *ops;
struct xfs_da_geometry *geo;
- mp = bp->b_target->bt_mount;
geo = mp->m_dir_geo;
/*
@@ -249,7 +245,7 @@ static xfs_failaddr_t
xfs_dir3_data_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
if (!xfs_verify_magic(bp, hdr3->magic))
@@ -298,7 +294,7 @@ static void
xfs_dir3_data_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -315,7 +311,7 @@ static void
xfs_dir3_data_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 9c2a0a13ed61..a53e4585a2f3 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -6,12 +6,11 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
@@ -20,8 +19,6 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
-#include "xfs_log.h"
/*
* Local function declarations.
@@ -144,7 +141,7 @@ static xfs_failaddr_t
xfs_dir3_leaf_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir2_leaf *leaf = bp->b_addr;
xfs_failaddr_t fa;
@@ -159,7 +156,7 @@ static void
xfs_dir3_leaf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -176,7 +173,7 @@ static void
xfs_dir3_leaf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 16731d2d684b..afcc6642690a 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -6,12 +6,11 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_dir2.h"
@@ -20,7 +19,6 @@
#include "xfs_trace.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_log.h"
/*
@@ -84,7 +82,7 @@ static xfs_failaddr_t
xfs_dir3_free_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir2_free_hdr *hdr = bp->b_addr;
if (!xfs_verify_magic(bp, hdr->magic))
@@ -110,7 +108,7 @@ static void
xfs_dir3_free_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -127,7 +125,7 @@ static void
xfs_dir3_free_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 585dfdb7b6b6..033589257f54 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -5,16 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_error.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 88fa11071f9f..e8bd688a4073 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -16,8 +16,6 @@
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_error.h"
-#include "xfs_cksum.h"
-#include "xfs_trace.h"
int
xfs_calc_dquots_per_chunk(
@@ -224,7 +222,7 @@ static xfs_failaddr_t
xfs_dquot_buf_verify_struct(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
return xfs_dquot_buf_verify(mp, bp, false);
}
@@ -233,7 +231,7 @@ static void
xfs_dquot_buf_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp, false))
return;
@@ -250,7 +248,7 @@ static void
xfs_dquot_buf_readahead_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
if (!xfs_dquot_buf_verify_crc(mp, bp, true) ||
xfs_dquot_buf_verify(mp, bp, true) != NULL) {
@@ -268,7 +266,7 @@ static void
xfs_dquot_buf_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_dquot_buf_verify(mp, bp, false);
}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9bb3c48843ec..c968b60cee15 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1)
#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog
#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog
-#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log
+#define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log)
#define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log
#define XFS_INO_BITS(mp) \
XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index e7382c780ed7..52d03a3a02a4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -97,7 +97,7 @@ struct getbmapx {
* For use by backup and restore programs to set the XFS on-disk inode
* fields di_dmevmask and di_dmstate. These must be set to exactly and
* only values previously obtained via xfs_bulkstat! (Specifically the
- * xfs_bstat_t fields bs_dmevmask and bs_dmstate.)
+ * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
*/
#ifndef HAVE_FSDMIDATA
struct fsdmidata {
@@ -328,7 +328,7 @@ typedef struct xfs_bstime {
__s32 tv_nsec; /* and nanoseconds */
} xfs_bstime_t;
-typedef struct xfs_bstat {
+struct xfs_bstat {
__u64 bs_ino; /* inode number */
__u16 bs_mode; /* type and mode */
__u16 bs_nlink; /* number of links */
@@ -356,7 +356,53 @@ typedef struct xfs_bstat {
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
-} xfs_bstat_t;
+};
+
+/* New bulkstat structure that reports v5 features and fixes padding issues */
+struct xfs_bulkstat {
+ uint64_t bs_ino; /* inode number */
+ uint64_t bs_size; /* file size */
+
+ uint64_t bs_blocks; /* number of blocks */
+ uint64_t bs_xflags; /* extended flags */
+
+ uint64_t bs_atime; /* access time, seconds */
+ uint64_t bs_mtime; /* modify time, seconds */
+
+ uint64_t bs_ctime; /* inode change time, seconds */
+ uint64_t bs_btime; /* creation time, seconds */
+
+ uint32_t bs_gen; /* generation count */
+ uint32_t bs_uid; /* user id */
+ uint32_t bs_gid; /* group id */
+ uint32_t bs_projectid; /* project id */
+
+ uint32_t bs_atime_nsec; /* access time, nanoseconds */
+ uint32_t bs_mtime_nsec; /* modify time, nanoseconds */
+ uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */
+ uint32_t bs_btime_nsec; /* creation time, nanoseconds */
+
+ uint32_t bs_blksize; /* block size */
+ uint32_t bs_rdev; /* device value */
+ uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */
+ uint32_t bs_extsize_blks; /* extent size hint, blocks */
+
+ uint32_t bs_nlink; /* number of links */
+ uint32_t bs_extents; /* number of extents */
+ uint32_t bs_aextents; /* attribute number of extents */
+ uint16_t bs_version; /* structure version */
+ uint16_t bs_forkoff; /* inode fork offset in bytes */
+
+ uint16_t bs_sick; /* sick inode metadata */
+ uint16_t bs_checked; /* checked inode metadata */
+ uint16_t bs_mode; /* type and mode */
+ uint16_t bs_pad2; /* zeroed */
+
+ uint64_t bs_pad[7]; /* zeroed */
+};
+
+#define XFS_BULKSTAT_VERSION_V1 (1)
+#define XFS_BULKSTAT_VERSION_V5 (5)
/* bs_sick flags */
#define XFS_BS_SICK_INODE (1 << 0) /* inode core */
@@ -374,7 +420,7 @@ typedef struct xfs_bstat {
* to retain compatibility with "old" filesystems).
*/
static inline uint32_t
-bstat_get_projid(struct xfs_bstat *bs)
+bstat_get_projid(const struct xfs_bstat *bs)
{
return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
}
@@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs)
/*
* The user-level BulkStat Request interface structure.
*/
-typedef struct xfs_fsop_bulkreq {
+struct xfs_fsop_bulkreq {
__u64 __user *lastip; /* last inode # pointer */
__s32 icount; /* count of entries in buffer */
void __user *ubuffer;/* user buffer for inode desc. */
__s32 __user *ocount; /* output count pointer */
-} xfs_fsop_bulkreq_t;
-
+};
/*
* Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
*/
-typedef struct xfs_inogrp {
+struct xfs_inogrp {
__u64 xi_startino; /* starting inode number */
__s32 xi_alloccount; /* # bits set in allocmask */
__u64 xi_allocmask; /* mask of allocated inodes */
-} xfs_inogrp_t;
+};
+/* New inumbers structure that reports v5 features and fixes padding issues */
+struct xfs_inumbers {
+ uint64_t xi_startino; /* starting inode number */
+ uint64_t xi_allocmask; /* mask of allocated inodes */
+ uint8_t xi_alloccount; /* # bits set in allocmask */
+ uint8_t xi_version; /* version */
+ uint8_t xi_padding[6]; /* zero */
+};
+
+#define XFS_INUMBERS_VERSION_V1 (1)
+#define XFS_INUMBERS_VERSION_V5 (5)
+
+/* Header for bulk inode requests. */
+struct xfs_bulk_ireq {
+ uint64_t ino; /* I/O: start with this inode */
+ uint32_t flags; /* I/O: operation flags */
+ uint32_t icount; /* I: count of entries in buffer */
+ uint32_t ocount; /* O: count of entries filled out */
+ uint32_t agno; /* I: see comment for IREQ_AGNO */
+ uint64_t reserved[5]; /* must be zero */
+};
+
+/*
+ * Only return results from the specified @agno. If @ino is zero, start
+ * with the first inode of @agno.
+ */
+#define XFS_BULK_IREQ_AGNO (1 << 0)
+
+/*
+ * Return bulkstat information for a single inode, where @ino value is a
+ * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_*
+ * values below. Not compatible with XFS_BULK_IREQ_AGNO.
+ */
+#define XFS_BULK_IREQ_SPECIAL (1 << 1)
+
+#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
+ XFS_BULK_IREQ_SPECIAL)
+
+/* Operate on the root directory inode. */
+#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
+
+/*
+ * ioctl structures for v5 bulkstat and inumbers requests
+ */
+struct xfs_bulkstat_req {
+ struct xfs_bulk_ireq hdr;
+ struct xfs_bulkstat bulkstat[];
+};
+#define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \
+ (nr) * sizeof(struct xfs_bulkstat))
+
+struct xfs_inumbers_req {
+ struct xfs_bulk_ireq hdr;
+ struct xfs_inumbers inumbers[];
+};
+#define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \
+ (nr) * sizeof(struct xfs_inumbers))
/*
* Error injection.
@@ -529,7 +631,7 @@ typedef struct xfs_swapext
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
- xfs_bstat_t sx_stat; /* stat of target b4 copy */
+ struct xfs_bstat sx_stat; /* stat of target b4 copy */
} xfs_swapext_t;
/*
@@ -701,6 +803,8 @@ struct xfs_scrub_metadata {
#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4)
#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t)
#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
+#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
+#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 49ddfeac19f2..272005ac8c88 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
-void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs);
+void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
#endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index fe9898875097..04377ab75863 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -12,17 +12,14 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_bmap.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_icreate_item.h"
@@ -31,20 +28,6 @@
#include "xfs_log.h"
#include "xfs_rmap.h"
-
-/*
- * Allocation group level functions.
- */
-int
-xfs_ialloc_cluster_alignment(
- struct xfs_mount *mp)
-{
- if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
- return mp->m_sb.sb_inoalignmt;
- return 1;
-}
-
/*
* Lookup a record by ino in the btree given by cur.
*/
@@ -299,7 +282,7 @@ xfs_ialloc_inode_init(
* sizes, manipulate the inodes in buffers which are multiples of the
* blocks size.
*/
- nbufs = length / mp->m_blocks_per_cluster;
+ nbufs = length / M_IGEO(mp)->blocks_per_cluster;
/*
* Figure out what version number to use in the inodes we create. If
@@ -343,9 +326,10 @@ xfs_ialloc_inode_init(
* Get the block.
*/
d = XFS_AGB_TO_DADDR(mp, agno, agbno +
- (j * mp->m_blocks_per_cluster));
+ (j * M_IGEO(mp)->blocks_per_cluster));
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize * mp->m_blocks_per_cluster,
+ mp->m_bsize *
+ M_IGEO(mp)->blocks_per_cluster,
XBF_UNMAPPED);
if (!fbuf)
return -ENOMEM;
@@ -353,7 +337,7 @@ xfs_ialloc_inode_init(
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
- for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+ for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
int ioffset = i << mp->m_sb.sb_inodelog;
uint isize = xfs_dinode_size(version);
@@ -616,24 +600,26 @@ error:
* Allocate new inodes in the allocation group specified by agbp.
* Return 0 for success, else error code.
*/
-STATIC int /* error code or 0 */
+STATIC int
xfs_ialloc_ag_alloc(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_buf_t *agbp, /* alloc group buffer */
- int *alloc)
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ int *alloc)
{
- xfs_agi_t *agi; /* allocation group header */
- xfs_alloc_arg_t args; /* allocation argument structure */
- xfs_agnumber_t agno;
- int error;
- xfs_agino_t newino; /* new first inode's number */
- xfs_agino_t newlen; /* new number of inodes */
- int isaligned = 0; /* inode allocation at stripe unit */
- /* boundary */
- uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */
+ struct xfs_agi *agi;
+ struct xfs_alloc_arg args;
+ xfs_agnumber_t agno;
+ int error;
+ xfs_agino_t newino; /* new first inode's number */
+ xfs_agino_t newlen; /* new number of inodes */
+ int isaligned = 0; /* inode allocation at stripe */
+ /* unit boundary */
+ /* init. to full chunk */
+ uint16_t allocmask = (uint16_t) -1;
struct xfs_inobt_rec_incore rec;
- struct xfs_perag *pag;
- int do_sparse = 0;
+ struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
+ int do_sparse = 0;
memset(&args, 0, sizeof(args));
args.tp = tp;
@@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc(
#ifdef DEBUG
/* randomly do sparse inode allocations */
if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
- args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks)
+ igeo->ialloc_min_blks < igeo->ialloc_blks)
do_sparse = prandom_u32() & 1;
#endif
@@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc(
* Locking will ensure that we don't have two callers in here
* at one time.
*/
- newlen = args.mp->m_ialloc_inos;
- if (args.mp->m_maxicount &&
+ newlen = igeo->ialloc_inos;
+ if (igeo->maxicount &&
percpu_counter_read_positive(&args.mp->m_icount) + newlen >
- args.mp->m_maxicount)
+ igeo->maxicount)
return -ENOSPC;
- args.minlen = args.maxlen = args.mp->m_ialloc_blks;
+ args.minlen = args.maxlen = igeo->ialloc_blks;
/*
* First try to allocate inodes contiguous with the last-allocated
* chunk of inodes. If the filesystem is striped, this will fill
@@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc(
newino = be32_to_cpu(agi->agi_newino);
agno = be32_to_cpu(agi->agi_seqno);
args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
- args.mp->m_ialloc_blks;
+ igeo->ialloc_blks;
if (do_sparse)
goto sparse_alloc;
if (likely(newino != NULLAGINO &&
@@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc(
* but not to use them in the actual exact allocation.
*/
args.alignment = 1;
- args.minalignslop = args.mp->m_cluster_align - 1;
+ args.minalignslop = igeo->cluster_align - 1;
/* Allow space for the inode btree to split. */
- args.minleft = args.mp->m_in_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels - 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
@@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc(
* pieces, so don't need alignment anyway.
*/
isaligned = 0;
- if (args.mp->m_sinoalign) {
+ if (igeo->ialloc_align) {
ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
args.alignment = args.mp->m_dalign;
isaligned = 1;
} else
- args.alignment = args.mp->m_cluster_align;
+ args.alignment = igeo->cluster_align;
/*
* Need to figure out where to allocate the inode blocks.
* Ideally they should be spaced out through the a.g.
@@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc(
/*
* Allow space for the inode btree to split.
*/
- args.minleft = args.mp->m_in_maxlevels - 1;
+ args.minleft = igeo->inobt_maxlevels - 1;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
@@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc(
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.agbno = be32_to_cpu(agi->agi_root);
args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
- args.alignment = args.mp->m_cluster_align;
+ args.alignment = igeo->cluster_align;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
@@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc(
* the sparse allocation length is smaller than a full chunk.
*/
if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
- args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks &&
+ igeo->ialloc_min_blks < igeo->ialloc_blks &&
args.fsbno == NULLFSBLOCK) {
sparse_alloc:
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -773,7 +759,7 @@ sparse_alloc:
args.alignment = args.mp->m_sb.sb_spino_align;
args.prod = 1;
- args.minlen = args.mp->m_ialloc_min_blks;
+ args.minlen = igeo->ialloc_min_blks;
args.maxlen = args.minlen;
/*
@@ -789,7 +775,7 @@ sparse_alloc:
args.min_agbno = args.mp->m_sb.sb_inoalignmt;
args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
args.mp->m_sb.sb_inoalignmt) -
- args.mp->m_ialloc_blks;
+ igeo->ialloc_blks;
error = xfs_alloc_vextent(&args);
if (error)
@@ -1006,7 +992,7 @@ xfs_ialloc_ag_select(
* space needed for alignment of inode chunks when checking the
* longest contiguous free space in the AG - this prevents us
* from getting ENOSPC because we have free space larger than
- * m_ialloc_blks but alignment constraints prevent us from using
+ * ialloc_blks but alignment constraints prevent us from using
* it.
*
* If we can't find an AG with space for full alignment slack to
@@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select(
* if we fail allocation due to alignment issues then it is most
* likely a real ENOSPC condition.
*/
- ineed = mp->m_ialloc_min_blks;
+ ineed = M_IGEO(mp)->ialloc_min_blks;
if (flags && ineed > 1)
- ineed += mp->m_cluster_align;
+ ineed += M_IGEO(mp)->cluster_align;
longest = pag->pagf_longest;
if (!longest)
longest = pag->pagf_flcount > 0;
@@ -1703,6 +1689,7 @@ xfs_dialloc(
int noroom = 0;
xfs_agnumber_t start_agno;
struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
int okalloc = 1;
if (*IO_agbp) {
@@ -1733,9 +1720,9 @@ xfs_dialloc(
* Read rough value of mp->m_icount by percpu_counter_read_positive,
* which will sacrifice the preciseness but improve the performance.
*/
- if (mp->m_maxicount &&
- percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
- > mp->m_maxicount) {
+ if (igeo->maxicount &&
+ percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
+ > igeo->maxicount) {
noroom = 1;
okalloc = 0;
}
@@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
- mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES);
+ M_IGEO(mp)->ialloc_blks,
+ &XFS_RMAP_OINFO_INODES);
return;
}
@@ -2261,7 +2249,7 @@ xfs_imap_lookup(
/* check that the returned record contains the required inode */
if (rec.ir_startino > agino ||
- rec.ir_startino + mp->m_ialloc_inos <= agino)
+ rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
return -EINVAL;
/* for untrusted inodes check it is allocated first */
@@ -2352,7 +2340,7 @@ xfs_imap(
* If the inode cluster size is the same as the blocksize or
* smaller we get to the buffer by simple arithmetics.
*/
- if (mp->m_blocks_per_cluster == 1) {
+ if (M_IGEO(mp)->blocks_per_cluster == 1) {
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
@@ -2368,8 +2356,8 @@ xfs_imap(
* find the location. Otherwise we have to do a btree
* lookup to find the location.
*/
- if (mp->m_inoalign_mask) {
- offset_agbno = agbno & mp->m_inoalign_mask;
+ if (M_IGEO(mp)->inoalign_mask) {
+ offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
chunk_agbno = agbno - offset_agbno;
} else {
error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
@@ -2381,13 +2369,13 @@ xfs_imap(
out_map:
ASSERT(agbno >= chunk_agbno);
cluster_agbno = chunk_agbno +
- ((offset_agbno / mp->m_blocks_per_cluster) *
- mp->m_blocks_per_cluster);
+ ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
+ M_IGEO(mp)->blocks_per_cluster);
offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
XFS_INO_TO_OFFSET(mp, ino);
imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
- imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+ imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
/*
@@ -2409,20 +2397,6 @@ out_map:
}
/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
- xfs_mount_t *mp) /* file system mount structure */
-{
- uint inodes;
-
- inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
- mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr,
- inodes);
-}
-
-/*
* Log specified fields for the ag hdr (inode section). The growth of the agi
* structure over time requires that we interpret the buffer as two logical
* regions delineated by the end of the unlinked list. This is due to the size
@@ -2493,7 +2467,7 @@ static xfs_failaddr_t
xfs_agi_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
int i;
@@ -2545,7 +2519,7 @@ static void
xfs_agi_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2562,7 +2536,7 @@ static void
xfs_agi_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
@@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes(
*freecount = ci.freecount;
return 0;
}
+
+/*
+ * Initialize inode-related geometry information.
+ *
+ * Compute the inode btree min and max levels and set maxicount.
+ *
+ * Set the inode cluster size. This may still be overridden by the file
+ * system block size if it is larger than the chosen cluster size.
+ *
+ * For v5 filesystems, scale the cluster size with the inode size to keep a
+ * constant ratio of inode per cluster buffer, but only if mkfs has set the
+ * inode alignment value appropriately for larger cluster sizes.
+ *
+ * Then compute the inode cluster alignment information.
+ */
+void
+xfs_ialloc_setup_geometry(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ uint64_t icount;
+ uint inodes;
+
+ /* Compute inode btree geometry. */
+ igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
+ igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
+ igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
+ igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
+ igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
+
+ igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
+ sbp->sb_inopblock);
+ igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
+
+ if (sbp->sb_spino_align)
+ igeo->ialloc_min_blks = sbp->sb_spino_align;
+ else
+ igeo->ialloc_min_blks = igeo->ialloc_blks;
+
+ /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
+ inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
+ igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
+ inodes);
+
+ /* Set the maximum inode count for this filesystem. */
+ if (sbp->sb_imax_pct) {
+ /*
+ * Make sure the maximum inode count is a multiple
+ * of the units we allocate inodes in.
+ */
+ icount = sbp->sb_dblocks * sbp->sb_imax_pct;
+ do_div(icount, 100);
+ do_div(icount, igeo->ialloc_blks);
+ igeo->maxicount = XFS_FSB_TO_INO(mp,
+ icount * igeo->ialloc_blks);
+ } else {
+ igeo->maxicount = 0;
+ }
+
+ /*
+ * Compute the desired size of an inode cluster buffer size, which
+ * starts at 8K and (on v5 filesystems) scales up with larger inode
+ * sizes.
+ *
+ * Preserve the desired inode cluster size because the sparse inodes
+ * feature uses that desired size (not the actual size) to compute the
+ * sparse inode alignment. The mount code validates this value, so we
+ * cannot change the behavior.
+ */
+ igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ int new_size = igeo->inode_cluster_size_raw;
+
+ new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+ if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+ igeo->inode_cluster_size_raw = new_size;
+ }
+
+ /* Calculate inode cluster ratios. */
+ if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
+ igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
+ igeo->inode_cluster_size_raw);
+ else
+ igeo->blocks_per_cluster = 1;
+ igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
+ igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
+
+ /* Calculate inode cluster alignment. */
+ if (xfs_sb_version_hasalign(&mp->m_sb) &&
+ mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
+ igeo->cluster_align = mp->m_sb.sb_inoalignmt;
+ else
+ igeo->cluster_align = 1;
+ igeo->inoalign_mask = igeo->cluster_align - 1;
+ igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
+
+ /*
+ * If we are using stripe alignment, check whether
+ * the stripe unit is a multiple of the inode alignment
+ */
+ if (mp->m_dalign && igeo->inoalign_mask &&
+ !(mp->m_dalign & igeo->inoalign_mask))
+ igeo->ialloc_align = mp->m_dalign;
+ else
+ igeo->ialloc_align = 0;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index e936b7cc9389..323592d563d5 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -23,16 +23,6 @@ struct xfs_icluster {
* sparse chunks */
};
-/* Calculate and return the number of filesystem blocks per inode cluster */
-static inline int
-xfs_icluster_size_fsb(
- struct xfs_mount *mp)
-{
- if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
- return 1;
- return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
-}
-
/*
* Make an inode pointer out of the buffer/offset.
*/
@@ -96,13 +86,6 @@ xfs_imap(
uint flags); /* flags for inode btree lookup */
/*
- * Compute and fill in value of m_in_maxlevels.
- */
-void
-xfs_ialloc_compute_maxlevels(
- struct xfs_mount *mp); /* file system mount structure */
-
-/*
* Log specified fields for the ag hdr (inode section)
*/
void
@@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int *stat);
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
+void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index bc2dfacd2f4a..b82992f795aa 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -11,14 +11,12 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_rmap.h"
@@ -28,7 +26,7 @@ xfs_inobt_get_minrecs(
struct xfs_btree_cur *cur,
int level)
{
- return cur->bc_mp->m_inobt_mnr[level != 0];
+ return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0];
}
STATIC struct xfs_btree_cur *
@@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs(
struct xfs_btree_cur *cur,
int level)
{
- return cur->bc_mp->m_inobt_mxr[level != 0];
+ return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0];
}
STATIC void
@@ -255,7 +253,7 @@ static xfs_failaddr_t
xfs_inobt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa;
unsigned int level;
@@ -281,10 +279,11 @@ xfs_inobt_verify(
/* level verification */
level = be16_to_cpu(block->bb_level);
- if (level >= mp->m_in_maxlevels)
+ if (level >= M_IGEO(mp)->inobt_maxlevels)
return __this_address;
- return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
+ return xfs_btree_sblock_verify(bp,
+ M_IGEO(mp)->inobt_mxr[level != 0]);
}
static void
@@ -546,7 +545,7 @@ xfs_inobt_max_size(
xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno);
/* Bail out if we're uninitialized, which can happen in mkfs. */
- if (mp->m_inobt_mxr[0] == 0)
+ if (M_IGEO(mp)->inobt_mxr[0] == 0)
return 0;
/*
@@ -558,11 +557,41 @@ xfs_inobt_max_size(
XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno)
agblocks -= mp->m_sb.sb_logblocks;
- return xfs_btree_calc_size(mp->m_inobt_mnr,
+ return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
(uint64_t)agblocks * mp->m_sb.sb_inopblock /
XFS_INODES_PER_CHUNK);
}
+/* Read AGI and create inobt cursor. */
+int
+xfs_inobt_cur(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ xfs_btnum_t which,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp)
+{
+ struct xfs_btree_cur *cur;
+ int error;
+
+ ASSERT(*agi_bpp == NULL);
+ ASSERT(*curpp == NULL);
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp);
+ if (error)
+ return error;
+
+ cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
+ if (!cur) {
+ xfs_trans_brelse(tp, *agi_bpp);
+ *agi_bpp = NULL;
+ return -ENOMEM;
+ }
+ *curpp = cur;
+ return 0;
+}
+
static int
xfs_inobt_count_blocks(
struct xfs_mount *mp,
@@ -571,15 +600,14 @@ xfs_inobt_count_blocks(
xfs_btnum_t btnum,
xfs_extlen_t *tree_blocks)
{
- struct xfs_buf *agbp;
- struct xfs_btree_cur *cur;
+ struct xfs_buf *agbp = NULL;
+ struct xfs_btree_cur *cur = NULL;
int error;
- error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp);
if (error)
return error;
- cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
error = xfs_btree_count_blocks(cur, tree_blocks);
xfs_btree_del_cursor(cur, error);
xfs_trans_brelse(tp, agbp);
@@ -619,5 +647,5 @@ xfs_iallocbt_calc_size(
struct xfs_mount *mp,
unsigned long long len)
{
- return xfs_btree_calc_size(mp->m_inobt_mnr, len);
+ return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len);
}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index ebdd0c6b8766..951305ecaae1 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
+int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno, xfs_btnum_t btnum,
+ struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index bc690f2409fa..27aa3f2bc4bc 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -3,18 +3,14 @@
* Copyright (c) 2017 Christoph Hellwig.
*/
-#include <linux/cache.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_bmap.h"
#include "xfs_trace.h"
/*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index e021d5133ccb..28ab3c5255e1 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -10,11 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_cksum.h"
#include "xfs_icache.h"
#include "xfs_trans.h"
#include "xfs_ialloc.h"
@@ -33,12 +31,9 @@ xfs_inobp_check(
xfs_buf_t *bp)
{
int i;
- int j;
xfs_dinode_t *dip;
- j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
-
- for (i = 0; i < j; i++) {
+ for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
if (!dip->di_next_unlinked) {
xfs_alert(mp,
@@ -80,7 +75,7 @@ xfs_inode_buf_verify(
struct xfs_buf *bp,
bool readahead)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_agnumber_t agno;
int i;
int ni;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f9acf1d436f6..bf3e04018246 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -3,10 +3,10 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/log2.h>
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -19,12 +19,10 @@
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_attr_sf.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
-#include "xfs_shared.h"
kmem_zone_t *xfs_ifork_zone;
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 1b542ec11d5d..7f55eb3f3653 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -12,9 +12,7 @@
#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_trans_space.h"
-#include "xfs_inode.h"
#include "xfs_da_btree.h"
-#include "xfs_attr_leaf.h"
#include "xfs_bmap_btree.h"
/*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 542aa1475b5f..51bb9bdb0e84 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -9,7 +9,6 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
@@ -19,7 +18,6 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
#include "xfs_refcount.h"
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 5d9de9b21726..38529dbacd55 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -12,12 +12,10 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
-#include "xfs_bmap.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
#include "xfs_rmap.h"
@@ -203,7 +201,7 @@ STATIC xfs_failaddr_t
xfs_refcountbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 8ed885507dd8..e6aeb390b2fb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -10,24 +10,17 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_extent_busy.h"
-#include "xfs_bmap.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
/*
* Lookup the first record less than or equal to [bno, len, owner, offset]
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 5d1f8884c888..fc78efa52c94 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -9,18 +9,14 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
#include "xfs_ag_resv.h"
@@ -292,7 +288,7 @@ static xfs_failaddr_t
xfs_rmapbt_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
struct xfs_perag *pag = bp->b_pag;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index eaaff67e9626..8ea1efc97b41 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -13,15 +13,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
-#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
-#include "xfs_icache.h"
#include "xfs_rtalloc.h"
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index e76a3e5d28d7..a08dd8f40346 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -10,26 +10,19 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_log.h"
#include "xfs_rmap_btree.h"
-#include "xfs_bmap.h"
#include "xfs_refcount_btree.h"
#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_health.h"
/*
@@ -686,7 +679,7 @@ xfs_sb_read_verify(
struct xfs_buf *bp)
{
struct xfs_sb sb;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
int error;
@@ -752,7 +745,7 @@ xfs_sb_write_verify(
struct xfs_buf *bp)
{
struct xfs_sb sb;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
int error;
@@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
*
* Mount initialization code establishing various mount
* fields from the superblock associated with the given
- * mount structure
+ * mount structure.
+ *
+ * Inode geometry are calculated in xfs_ialloc_setup_geometry.
*/
void
xfs_sb_mount_common(
- struct xfs_mount *mp,
- struct xfs_sb *sbp)
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
{
mp->m_agfrotor = mp->m_agirotor = 0;
mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -813,7 +808,6 @@ xfs_sb_mount_common(
mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
- mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
mp->m_blockmask = sbp->sb_blocksize - 1;
mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
mp->m_blockwmask = mp->m_blockwsize - 1;
@@ -823,11 +817,6 @@ xfs_sb_mount_common(
mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
- mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
- mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
- mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
- mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
-
mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
@@ -844,14 +833,6 @@ xfs_sb_mount_common(
mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
- mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
- sbp->sb_inopblock);
- mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
-
- if (sbp->sb_spino_align)
- mp->m_ialloc_min_blks = sbp->sb_spino_align;
- else
- mp->m_ialloc_min_blks = mp->m_ialloc_blks;
mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
}
@@ -939,7 +920,7 @@ xfs_log_sb(
struct xfs_trans *tp)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0);
+ struct xfs_buf *bp = xfs_trans_getsb(tp, mp);
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1005,7 +986,7 @@ xfs_update_secondary_sbs(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
- XFS_FSS_TO_BB(mp, 1), 0);
+ XFS_FSS_TO_BB(mp, 1));
/*
* If we get an error reading or writing alternate superblocks,
* continue. xfs_repair chooses the "best" superblock based
@@ -1069,7 +1050,7 @@ xfs_sync_sb_buf(
if (error)
return error;
- bp = xfs_trans_getsb(tp, mp, 0);
+ bp = xfs_trans_getsb(tp, mp);
xfs_log_sb(tp);
xfs_trans_bhold(tp, bp);
xfs_trans_set_sync(tp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 4e909791aeac..e0641b7337b3 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -65,7 +65,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */
-#define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */
/*
* LOWMODE is used by the allocator to activate the lowspace algorithm - when
* free space is running low the extent allocator may choose to allocate an
@@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_inode *ip, struct xfs_ifork *ifp);
xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
+/* Computed inode geometry for the filesystem. */
+struct xfs_ino_geometry {
+ /* Maximum inode count in this filesystem. */
+ uint64_t maxicount;
+
+ /* Actual inode cluster buffer size, in bytes. */
+ unsigned int inode_cluster_size;
+
+ /*
+ * Desired inode cluster buffer size, in bytes. This value is not
+ * rounded up to at least one filesystem block, which is necessary for
+ * the sole purpose of validating sb_spino_align. Runtime code must
+ * only ever use inode_cluster_size.
+ */
+ unsigned int inode_cluster_size_raw;
+
+ /* Inode cluster sizes, adjusted to be at least 1 fsb. */
+ unsigned int inodes_per_cluster;
+ unsigned int blocks_per_cluster;
+
+ /* Inode cluster alignment. */
+ unsigned int cluster_align;
+ unsigned int cluster_align_inodes;
+ unsigned int inoalign_mask; /* mask sb_inoalignmt if used */
+
+ unsigned int inobt_mxr[2]; /* max inobt btree records */
+ unsigned int inobt_mnr[2]; /* min inobt btree records */
+ unsigned int inobt_maxlevels; /* max inobt btree levels. */
+
+ /* Size of inode allocations under normal operation. */
+ unsigned int ialloc_inos;
+ unsigned int ialloc_blks;
+
+ /* Minimum inode blocks for a sparse allocation. */
+ unsigned int ialloc_min_blks;
+
+ /* stripe unit inode alignment */
+ unsigned int ialloc_align;
+
+ unsigned int agino_log; /* #bits for agino in inum */
+};
+
+/* Keep iterating the data structure. */
+#define XFS_ITER_CONTINUE (0)
+
+/* Stop iterating the data structure. */
+#define XFS_ITER_ABORT (1)
+
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index a0ccc253c43d..3b8260ca7d1b 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -11,12 +11,8 @@
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_error.h"
-#include "xfs_trace.h"
-#include "xfs_symlink.h"
-#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
@@ -90,7 +86,7 @@ static xfs_failaddr_t
xfs_symlink_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dsymlink_hdr *dsl = bp->b_addr;
if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -116,7 +112,7 @@ static void
xfs_symlink_read_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
/* no verification of non-crc buffers */
@@ -136,7 +132,7 @@ static void
xfs_symlink_write_verify(
struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_buf_log_item *bip = bp->b_log_item;
xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 83f4ee2afc49..d12bbd526e7c 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -15,12 +15,10 @@
#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_trans_space.h"
-#include "xfs_trace.h"
#define _ALLOC true
#define _FREE false
@@ -136,9 +134,10 @@ STATIC uint
xfs_calc_inobt_res(
struct xfs_mount *mp)
{
- return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
- XFS_FSB_TO_B(mp, 1));
+ return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
+ XFS_FSB_TO_B(mp, 1)) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
+ XFS_FSB_TO_B(mp, 1));
}
/*
@@ -167,7 +166,7 @@ xfs_calc_finobt_res(
* includes:
*
* the allocation btrees: 2 trees * (max depth - 1) * block size
- * the inode chunk: m_ialloc_blks * N
+ * the inode chunk: m_ino_geo.ialloc_blks * N
*
* The size N of the inode chunk reservation depends on whether it is for
* allocation or free and which type of create transaction is in use. An inode
@@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res(
size = XFS_FSB_TO_B(mp, 1);
}
- res += xfs_calc_buf_res(mp->m_ialloc_blks, size);
+ res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
return res;
}
@@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ 2 * M_IGEO(mp)->inode_cluster_size;
}
/*
@@ -345,7 +344,7 @@ STATIC uint
xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
{
return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
- max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+ M_IGEO(mp)->inode_cluster_size;
}
/*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index a62fb950bef1..88221c7a04cc 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -56,9 +56,9 @@
#define XFS_DIRREMOVE_SPACE_RES(mp) \
XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
#define XFS_IALLOC_SPACE_RES(mp) \
- ((mp)->m_ialloc_blks + \
+ (M_IGEO(mp)->ialloc_blks + \
(xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
- ((mp)->m_in_maxlevels - 1)))
+ (M_IGEO(mp)->inobt_maxlevels - 1)))
/*
* Space reservation values for various transactions.
@@ -94,7 +94,8 @@
#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
#define XFS_IFREE_SPACE_RES(mp) \
- (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0)
+ (xfs_sb_version_hasfinobt(&mp->m_sb) ? \
+ M_IGEO(mp)->inobt_maxlevels : 0)
#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index d51acc95bc00..4f595546a639 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -7,19 +7,10 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
/* Find the size of the AG, in blocks. */
xfs_agblock_t
@@ -87,14 +78,14 @@ xfs_agino_range(
* Calculate the first inode, which will be in the first
* cluster-aligned block after the AGFL.
*/
- bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align);
+ bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
*first = XFS_AGB_TO_AGINO(mp, bno);
/*
* Calculate the last inode, which will be at the end of the
* last (aligned) cluster that can be allocated in the AG.
*/
- bno = round_down(eoag, mp->m_cluster_align);
+ bno = round_down(eoag, M_IGEO(mp)->cluster_align);
*last = XFS_AGB_TO_AGINO(mp, bno) - 1;
}
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index adaeabdefdd3..16b09b941441 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -9,20 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Superblock */
@@ -646,7 +639,7 @@ xchk_agfl_block(
xchk_agfl_block_xref(sc, agbno);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return XFS_ITER_ABORT;
return 0;
}
@@ -737,7 +730,7 @@ xchk_agfl(
/* Check the blocks in the AGFL. */
error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
sc->sa.agfl_bp, xchk_agfl_block, &sai);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ if (error == XFS_ITER_ABORT) {
error = 0;
goto out_free;
}
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 64e31f87d490..7a1a38b636a9 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -9,22 +9,17 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 44883e9112ad..a43d1813c4ff 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -9,19 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub free space btrees.
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index dce74ec57038..1afc58bf71dd 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -9,26 +9,62 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
-#include "xfs_dir2.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/dabtree.h"
-#include "scrub/trace.h"
+#include "scrub/attr.h"
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
+/*
+ * Allocate enough memory to hold an attr value and attr block bitmaps,
+ * reallocating the buffer if necessary. Buffer contents are not preserved
+ * across a reallocation.
+ */
+int
+xchk_setup_xattr_buf(
+ struct xfs_scrub *sc,
+ size_t value_size,
+ xfs_km_flags_t flags)
+{
+ size_t sz;
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ /*
+ * We need enough space to read an xattr value from the file or enough
+ * space to hold three copies of the xattr free space bitmap. We don't
+ * need the buffer space for both purposes at the same time.
+ */
+ sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+ sz = max_t(size_t, sz, value_size);
+
+ /*
+ * If there's already a buffer, figure out if we need to reallocate it
+ * to accommodate a larger size.
+ */
+ if (ab) {
+ if (sz <= ab->sz)
+ return 0;
+ kmem_free(ab);
+ sc->buf = NULL;
+ }
+
+ /*
+ * Don't zero the buffer upon allocation to avoid runtime overhead.
+ * All users must be careful never to read uninitialized contents.
+ */
+ ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
+ if (!ab)
+ return -ENOMEM;
+
+ ab->sz = sz;
+ sc->buf = ab;
+ return 0;
+}
/* Set us up to scrub an inode's extended attributes. */
int
@@ -36,19 +72,18 @@ xchk_setup_xattr(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
- size_t sz;
+ int error;
/*
- * Allocate the buffer without the inode lock held. We need enough
- * space to read every xattr value in the file or enough space to
- * hold three copies of the xattr free space bitmap. (Not both at
- * the same time.)
+ * We failed to get memory while checking attrs, so this time try to
+ * get all the memory we're ever going to need. Allocate the buffer
+ * without the inode lock held, which means we can sleep.
*/
- sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) *
- BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
- sc->buf = kmem_zalloc_large(sz, KM_SLEEP);
- if (!sc->buf)
- return -ENOMEM;
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP);
+ if (error)
+ return error;
+ }
return xchk_setup_inode_contents(sc, ip, 0);
}
@@ -83,7 +118,7 @@ xchk_xattr_listent(
sx = container_of(context, struct xchk_xattr, context);
if (xchk_should_terminate(sx->sc, &error)) {
- context->seen_enough = 1;
+ context->seen_enough = error;
return;
}
@@ -99,6 +134,19 @@ xchk_xattr_listent(
return;
}
+ /*
+ * Try to allocate enough memory to extrat the attr value. If that
+ * doesn't work, we overload the seen_enough variable to convey
+ * the error message back to the main scrub function.
+ */
+ error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ error = -EDEADLOCK;
+ if (error) {
+ context->seen_enough = error;
+ return;
+ }
+
args.flags = ATTR_KERNOTIME;
if (flags & XFS_ATTR_ROOT)
args.flags |= ATTR_ROOT;
@@ -111,8 +159,8 @@ xchk_xattr_listent(
args.namelen = namelen;
args.hashval = xfs_da_hashname(args.name, args.namelen);
args.trans = context->tp;
- args.value = sx->sc->buf;
- args.valuelen = XATTR_SIZE_MAX;
+ args.value = xchk_xattr_valuebuf(sx->sc);
+ args.valuelen = valuelen;
error = xfs_attr_get_ilocked(context->dp, &args);
if (error == -EEXIST)
@@ -125,7 +173,7 @@ xchk_xattr_listent(
args.blkno);
fail_xref:
if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- context->seen_enough = 1;
+ context->seen_enough = XFS_ITER_ABORT;
return;
}
@@ -170,13 +218,12 @@ xchk_xattr_check_freemap(
unsigned long *map,
struct xfs_attr3_icleaf_hdr *leafhdr)
{
- unsigned long *freemap;
- unsigned long *dstmap;
+ unsigned long *freemap = xchk_xattr_freemap(sc);
+ unsigned long *dstmap = xchk_xattr_dstmap(sc);
unsigned int mapsize = sc->mp->m_attr_geo->blksize;
int i;
/* Construct bitmap of freemap contents. */
- freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
bitmap_zero(freemap, mapsize);
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (!xchk_xattr_set_map(sc, freemap,
@@ -186,7 +233,6 @@ xchk_xattr_check_freemap(
}
/* Look for bits that are set in freemap and are marked in use. */
- dstmap = freemap + BITS_TO_LONGS(mapsize);
return bitmap_and(dstmap, freemap, map, mapsize) == 0;
}
@@ -201,13 +247,13 @@ xchk_xattr_entry(
char *buf_end,
struct xfs_attr_leafblock *leaf,
struct xfs_attr3_icleaf_hdr *leafhdr,
- unsigned long *usedmap,
struct xfs_attr_leaf_entry *ent,
int idx,
unsigned int *usedbytes,
__u32 *last_hashval)
{
struct xfs_mount *mp = ds->state->mp;
+ unsigned long *usedmap = xchk_xattr_usedmap(ds->sc);
char *name_end;
struct xfs_attr_leaf_name_local *lentry;
struct xfs_attr_leaf_name_remote *rentry;
@@ -267,16 +313,26 @@ xchk_xattr_block(
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *ent;
struct xfs_attr_leaf_entry *entries;
- unsigned long *usedmap = ds->sc->buf;
+ unsigned long *usedmap;
char *buf_end;
size_t off;
__u32 last_hashval = 0;
unsigned int usedbytes = 0;
unsigned int hdrsize;
int i;
+ int error;
if (*last_checked == blk->blkno)
return 0;
+
+ /* Allocate memory for block usage checking. */
+ error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ return -EDEADLOCK;
+ if (error)
+ return error;
+ usedmap = xchk_xattr_usedmap(ds->sc);
+
*last_checked = blk->blkno;
bitmap_zero(usedmap, mp->m_attr_geo->blksize);
@@ -324,7 +380,7 @@ xchk_xattr_block(
/* Check the entry and nameval. */
xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
- usedmap, ent, i, &usedbytes, &last_hashval);
+ ent, i, &usedbytes, &last_hashval);
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
@@ -464,6 +520,10 @@ xchk_xattr(
error = xfs_attr_list_int_ilocked(&sx.context);
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
goto out;
+
+ /* Did our listent function try to return any errors? */
+ if (sx.context.seen_enough < 0)
+ error = sx.context.seen_enough;
out:
return error;
}
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
new file mode 100644
index 000000000000..13a1d2e8424d
--- /dev/null
+++ b/fs/xfs/scrub/attr.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SCRUB_ATTR_H__
+#define __XFS_SCRUB_ATTR_H__
+
+/*
+ * Temporary storage for online scrub and repair of extended attributes.
+ */
+struct xchk_xattr_buf {
+ /* Size of @buf, in bytes. */
+ size_t sz;
+
+ /*
+ * Memory buffer -- either used for extracting attr values while
+ * walking the attributes; or for computing attr block bitmaps when
+ * checking the attribute tree.
+ *
+ * Each bitmap contains enough bits to track every byte in an attr
+ * block (rounded up to the size of an unsigned long). The attr block
+ * used space bitmap starts at the beginning of the buffer; the free
+ * space bitmap follows immediately after; and we have a third buffer
+ * for storing intermediate bitmap results.
+ */
+ uint8_t buf[0];
+};
+
+/* A place to store attribute values. */
+static inline uint8_t *
+xchk_xattr_valuebuf(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return ab->buf;
+}
+
+/* A bitmap of space usage computed by walking an attr leaf block. */
+static inline unsigned long *
+xchk_xattr_usedmap(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return (unsigned long *)ab->buf;
+}
+
+/* A bitmap of free space computed by walking attr leaf block free info. */
+static inline unsigned long *
+xchk_xattr_freemap(
+ struct xfs_scrub *sc)
+{
+ return xchk_xattr_usedmap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+/* A bitmap used to hold temporary results. */
+static inline unsigned long *
+xchk_xattr_dstmap(
+ struct xfs_scrub *sc)
+{
+ return xchk_xattr_freemap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
+}
+
+int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
+ xfs_km_flags_t flags);
+
+#endif /* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index fdadc9e1dc49..3d47d111be5a 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,11 +10,6 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
-#include "scrub/xfs_scrub.h"
-#include "scrub/scrub.h"
-#include "scrub/common.h"
-#include "scrub/trace.h"
-#include "scrub/repair.h"
#include "scrub/bitmap.h"
/*
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index a703cd58a90e..1bd29fdc2ab5 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -9,27 +9,19 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/* Set us up with an inode's bmap. */
int
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 117910db51b8..f52a7b8256f9 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -9,14 +9,7 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 973aa59975e3..18876056e5e0 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -9,22 +9,16 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-#include "xfs_itable.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
@@ -32,11 +26,9 @@
#include "xfs_trans_priv.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/btree.h"
#include "scrub/repair.h"
#include "scrub/health.h"
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 90527b094878..94c4f1de1922 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -9,20 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_attr_leaf.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index a38a22785a1a..1e2e11721eb9 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -9,24 +9,14 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
#include "scrub/dabtree.h"
/* Set us up to scrub directories. */
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 07c11e3e6437..fc3f510c9034 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -9,22 +9,10 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
-#include "xfs_rmap.h"
-#include "xfs_error.h"
-#include "xfs_errortag.h"
-#include "xfs_icache.h"
#include "xfs_health.h"
-#include "xfs_bmap.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 23cf8e2f25db..b2f602811e9d 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -7,18 +7,10 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
-#include "xfs_inode.h"
#include "xfs_health.h"
#include "scrub/scrub.h"
-#include "scrub/health.h"
/*
* Scrub and In-Core Filesystem Health Assessments
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 9b47117180cb..681758704fda 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -9,21 +9,14 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_icache.h"
#include "xfs_rmap.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster(
int error = 0;
nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
- mp->m_inodes_per_cluster);
+ M_IGEO(mp)->inodes_per_cluster);
/* Map this inode cluster */
agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
@@ -251,7 +244,7 @@ xchk_iallocbt_check_cluster(
*/
ir_holemask = (irec->ir_holemask & cluster_mask);
imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
- imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
+ imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
mp->m_sb.sb_inodelog;
@@ -276,12 +269,12 @@ xchk_iallocbt_check_cluster(
/* If any part of this is a hole, skip it. */
if (ir_holemask) {
xchk_xref_is_not_owned_by(bs->sc, agbno,
- mp->m_blocks_per_cluster,
+ M_IGEO(mp)->blocks_per_cluster,
&XFS_RMAP_OINFO_INODES);
return 0;
}
- xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster,
+ xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
&XFS_RMAP_OINFO_INODES);
/* Grab the inode cluster buffer. */
@@ -333,7 +326,7 @@ xchk_iallocbt_check_clusters(
*/
for (cluster_base = 0;
cluster_base < XFS_INODES_PER_CHUNK;
- cluster_base += bs->sc->mp->m_inodes_per_cluster) {
+ cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
if (error)
break;
@@ -355,6 +348,7 @@ xchk_iallocbt_rec_alignment(
{
struct xfs_mount *mp = bs->sc->mp;
struct xchk_iallocbt *iabt = bs->private;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
/*
* finobt records have different positioning requirements than inobt
@@ -372,7 +366,7 @@ xchk_iallocbt_rec_alignment(
unsigned int imask;
imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
- mp->m_cluster_align_inodes) - 1;
+ igeo->cluster_align_inodes) - 1;
if (irec->ir_startino & imask)
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return;
@@ -400,17 +394,17 @@ xchk_iallocbt_rec_alignment(
}
/* inobt records must be aligned to cluster and inoalignmnt size. */
- if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) {
+ if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return;
}
- if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) {
+ if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return;
}
- if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK)
+ if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
return;
/*
@@ -419,7 +413,7 @@ xchk_iallocbt_rec_alignment(
* after this one.
*/
iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
- iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster;
+ iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
}
/* Scrub an inobt/finobt record. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index e213efc194a1..6d483ab29e63 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -9,27 +9,17 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_inode_buf.h"
-#include "xfs_inode_fork.h"
#include "xfs_ialloc.h"
#include "xfs_da_format.h"
#include "xfs_reflink.h"
#include "xfs_rmap.h"
-#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Grab total control of the inode metadata. It doesn't matter here if
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index d5d197f1b80f..c962bd534690 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -9,21 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_ialloc.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up to scrub parents. */
int
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 5dfe2b5924db..0a33b4421c32 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -9,24 +9,13 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
-#include "xfs_dquot.h"
-#include "xfs_dquot_item.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Convert a scrub type code to a DQ flag, or return 0 if error. */
static inline uint
@@ -144,7 +133,7 @@ xchk_quota_item(
if (bsoft > bhard)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
- if (ihard > mp->m_maxicount)
+ if (ihard > M_IGEO(mp)->maxicount)
xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
if (isoft > ihard)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 708b4158eb90..93b3793bc5b3 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -7,22 +7,12 @@
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub reference count btrees.
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index eb358f0f5e0a..4cfeec57fb05 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -9,29 +9,21 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
#include "xfs_alloc.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
-#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_extent_busy.h"
#include "xfs_ag_resv.h"
-#include "xfs_trans_space.h"
#include "xfs_quota.h"
-#include "xfs_attr.h"
-#include "xfs_reflink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -357,7 +349,7 @@ xrep_init_btblock(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
XFS_FSB_TO_BB(mp, 1), 0);
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
- xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0);
+ xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
xfs_trans_log_buf(tp, bp, 0, bp->b_length);
bp->b_ops = ops;
@@ -672,7 +664,7 @@ xrep_findroot_agfl_walk(
{
xfs_agblock_t *agbno = priv;
- return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0;
+ return (*agbno == bno) ? XFS_ITER_ABORT : 0;
}
/* Does this block match the btree information passed in? */
@@ -702,7 +694,7 @@ xrep_findroot_block(
if (owner == XFS_RMAP_OWN_AG) {
error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
xrep_findroot_agfl_walk, &agbno);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == XFS_ITER_ABORT)
return 0;
if (error)
return error;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 92a140c5b55e..8d4cefd761c1 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -9,21 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_btree.h"
-#include "xfs_bit.h"
-#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
-#include "xfs_ialloc.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
-#include "scrub/trace.h"
/*
* Set us up to scrub reverse mapping btrees.
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index dbe115b075f7..c642bc206c41 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,19 +9,12 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up with the realtime metadata locked. */
int
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index f630389ee176..15c8c5f3f688 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -9,36 +9,16 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_icache.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_refcount.h"
-#include "xfs_refcount_btree.h"
-#include "xfs_rmap.h"
-#include "xfs_rmap_btree.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
-#include "xfs_log.h"
-#include "xfs_trans_priv.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
-#include "scrub/btree.h"
#include "scrub/repair.h"
#include "scrub/health.h"
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index f7ebaa946999..99c0b1234c3c 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -9,19 +9,11 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_btree.h"
-#include "xfs_bit.h"
#include "xfs_log_format.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
#include "xfs_inode.h"
-#include "xfs_inode_fork.h"
#include "xfs_symlink.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
-#include "scrub/trace.h"
/* Set us up to scrub a symbolic link. */
int
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 96feaf8dcdec..9eaab2eb5ed3 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -10,15 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_trans.h"
-#include "xfs_bit.h"
-#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
-#include "scrub/common.h"
/* Figure out which block the btree cursor was pointing to. */
static inline xfs_fsblock_t
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8039e35147dd..cbda40d40326 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -4,16 +4,14 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11f703d4a605..761248ee2778 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -12,16 +12,11 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
-#include <linux/writeback.h>
/*
* structure owned by writepages passed to individual writepage calls
@@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc(
struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0,
- XFS_TRANS_NOFS, &tp);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
if (error)
return error;
@@ -240,9 +234,17 @@ xfs_end_ioend(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
+ unsigned int nofs_flag;
int error;
/*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
+ /*
* Just clean up the in-memory strutures if the fs has been shut down.
*/
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -282,6 +284,8 @@ done:
list_del_init(&ioend->io_list);
xfs_destroy_ioend(ioend, error);
}
+
+ memalloc_nofs_restore(nofs_flag);
}
/*
@@ -290,13 +294,9 @@ done:
static bool
xfs_ioend_can_merge(
struct xfs_ioend *ioend,
- int ioend_error,
struct xfs_ioend *next)
{
- int next_error;
-
- next_error = blk_status_to_errno(next->io_bio->bi_status);
- if (ioend_error != next_error)
+ if (ioend->io_bio->bi_status != next->io_bio->bi_status)
return false;
if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK))
return false;
@@ -305,11 +305,28 @@ xfs_ioend_can_merge(
return false;
if (ioend->io_offset + ioend->io_size != next->io_offset)
return false;
- if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next))
- return false;
return true;
}
+/*
+ * If the to be merged ioend has a preallocated transaction for file
+ * size updates we need to ensure the ioend it is merged into also
+ * has one. If it already has one we can simply cancel the transaction
+ * as it is guaranteed to be clean.
+ */
+static void
+xfs_ioend_merge_append_transactions(
+ struct xfs_ioend *ioend,
+ struct xfs_ioend *next)
+{
+ if (!ioend->io_append_trans) {
+ ioend->io_append_trans = next->io_append_trans;
+ next->io_append_trans = NULL;
+ } else {
+ xfs_setfilesize_ioend(next, -ECANCELED);
+ }
+}
+
/* Try to merge adjacent completions. */
STATIC void
xfs_ioend_try_merge(
@@ -317,25 +334,16 @@ xfs_ioend_try_merge(
struct list_head *more_ioends)
{
struct xfs_ioend *next_ioend;
- int ioend_error;
- int error;
-
- if (list_empty(more_ioends))
- return;
-
- ioend_error = blk_status_to_errno(ioend->io_bio->bi_status);
while (!list_empty(more_ioends)) {
next_ioend = list_first_entry(more_ioends, struct xfs_ioend,
io_list);
- if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend))
+ if (!xfs_ioend_can_merge(ioend, next_ioend))
break;
list_move_tail(&next_ioend->io_list, &ioend->io_list);
ioend->io_size += next_ioend->io_size;
- if (ioend->io_append_trans) {
- error = xfs_setfilesize_ioend(next_ioend, 1);
- ASSERT(error == 1);
- }
+ if (next_ioend->io_append_trans)
+ xfs_ioend_merge_append_transactions(ioend, next_ioend);
}
}
@@ -626,7 +634,7 @@ allocate_blocks:
* reference to the ioend to ensure that the ioend completion is only done once
* all bios have been submitted and the ioend is really done.
*
- * If @fail is non-zero, it means that we have a situation where some part of
+ * If @status is non-zero, it means that we have a situation where some part of
* the submission process has failed after we have marked paged for writeback
* and unlocked them. In this situation, we need to fail the bio and ioend
* rather than submit it to IO. This typically only happens on a filesystem
@@ -638,21 +646,19 @@ xfs_submit_ioend(
struct xfs_ioend *ioend,
int status)
{
+ unsigned int nofs_flag;
+
+ /*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
/* Convert CoW extents to regular */
if (!status && ioend->io_fork == XFS_COW_FORK) {
- /*
- * Yuk. This can do memory allocation, but is not a
- * transactional operation so everything is done in GFP_KERNEL
- * context. That can deadlock, because we hold pages in
- * writeback state and GFP_KERNEL allocations can block on them.
- * Hence we must operate in nofs conditions here.
- */
- unsigned nofs_flag;
-
- nofs_flag = memalloc_nofs_save();
status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
ioend->io_offset, ioend->io_size);
- memalloc_nofs_restore(nofs_flag);
}
/* Reserve log space if we might write beyond the on-disk inode size. */
@@ -663,9 +669,10 @@ xfs_submit_ioend(
!ioend->io_append_trans)
status = xfs_setfilesize_trans_alloc(ioend);
+ memalloc_nofs_restore(nofs_flag);
+
ioend->io_bio->bi_private = ioend;
ioend->io_bio->bi_end_io = xfs_end_bio;
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
/*
* If we are failing the IO now, just mark the ioend with an
@@ -679,7 +686,6 @@ xfs_submit_ioend(
return status;
}
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
submit_bio(ioend->io_bio);
return 0;
}
@@ -691,7 +697,8 @@ xfs_alloc_ioend(
xfs_exntst_t state,
xfs_off_t offset,
struct block_device *bdev,
- sector_t sector)
+ sector_t sector,
+ struct writeback_control *wbc)
{
struct xfs_ioend *ioend;
struct bio *bio;
@@ -699,6 +706,9 @@ xfs_alloc_ioend(
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = sector;
+ bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
+ bio->bi_write_hint = inode->i_write_hint;
+ wbc_init_bio(wbc, bio);
ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
INIT_LIST_HEAD(&ioend->io_list);
@@ -719,24 +729,22 @@ xfs_alloc_ioend(
* so that the bi_private linkage is set up in the right direction for the
* traversal in xfs_destroy_ioend().
*/
-static void
+static struct bio *
xfs_chain_bio(
- struct xfs_ioend *ioend,
- struct writeback_control *wbc,
- struct block_device *bdev,
- sector_t sector)
+ struct bio *prev)
{
struct bio *new;
new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
- bio_set_dev(new, bdev);
- new->bi_iter.bi_sector = sector;
- bio_chain(ioend->io_bio, new);
- bio_get(ioend->io_bio); /* for xfs_destroy_ioend */
- ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
- ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
- submit_bio(ioend->io_bio);
- ioend->io_bio = new;
+ bio_copy_dev(new, prev);/* also copies over blkcg information */
+ new->bi_iter.bi_sector = bio_end_sector(prev);
+ new->bi_opf = prev->bi_opf;
+ new->bi_write_hint = prev->bi_write_hint;
+
+ bio_chain(prev, new);
+ bio_get(prev); /* for xfs_destroy_ioend */
+ submit_bio(prev);
+ return new;
}
/*
@@ -772,7 +780,7 @@ xfs_add_to_ioend(
if (wpc->ioend)
list_add(&wpc->ioend->io_list, iolist);
wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
- wpc->imap.br_state, offset, bdev, sector);
+ wpc->imap.br_state, offset, bdev, sector, wbc);
}
merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
@@ -783,11 +791,12 @@ xfs_add_to_ioend(
if (!merged) {
if (bio_full(wpc->ioend->io_bio, len))
- xfs_chain_bio(wpc->ioend, wbc, bdev, sector);
+ wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio);
bio_add_page(wpc->ioend->io_bio, page, len, poff);
}
wpc->ioend->io_size += len;
+ wbc_account_io(wbc, page, len);
}
STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f62b03186c62..45a1ea240cbb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops;
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
-extern void xfs_count_page_state(struct page *, int *, int *);
extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 228821b2ebe0..dc93c51c17de 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -15,18 +15,13 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_inode.h"
-#include "xfs_alloc.h"
#include "xfs_attr_remote.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
-#include "xfs_trace.h"
#include "xfs_dir2.h"
-#include "xfs_defer.h"
/*
* Look at all the extents for this logical region,
@@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive(
int size;
int tmp;
int i;
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 3d213a7394c5..58fc820a70c6 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -6,25 +6,20 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_sf.h"
-#include "xfs_attr_remote.h"
#include "xfs_attr_leaf.h"
#include "xfs_error.h"
#include "xfs_trace.h"
-#include "xfs_buf_item.h"
-#include "xfs_cksum.h"
#include "xfs_dir2.h"
STATIC int
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
new file mode 100644
index 000000000000..e2148f2d5d6b
--- /dev/null
+++ b/fs/xfs/xfs_bio_io.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+#include "xfs.h"
+
+static inline unsigned int bio_max_vecs(unsigned int count)
+{
+ return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+}
+
+int
+xfs_rw_bdev(
+ struct block_device *bdev,
+ sector_t sector,
+ unsigned int count,
+ char *data,
+ unsigned int op)
+
+{
+ unsigned int is_vmalloc = is_vmalloc_addr(data);
+ unsigned int left = count;
+ int error;
+ struct bio *bio;
+
+ if (is_vmalloc && op == REQ_OP_WRITE)
+ flush_kernel_vmap_range(data, count);
+
+ bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+ bio_set_dev(bio, bdev);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_opf = op | REQ_META | REQ_SYNC;
+
+ do {
+ struct page *page = kmem_to_page(data);
+ unsigned int off = offset_in_page(data);
+ unsigned int len = min_t(unsigned, left, PAGE_SIZE - off);
+
+ while (bio_add_page(bio, page, len, off) != len) {
+ struct bio *prev = bio;
+
+ bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
+ bio_copy_dev(bio, prev);
+ bio->bi_iter.bi_sector = bio_end_sector(prev);
+ bio->bi_opf = prev->bi_opf;
+ bio_chain(prev, bio);
+
+ submit_bio(prev);
+ }
+
+ data += len;
+ left -= len;
+ } while (left > 0);
+
+ error = submit_bio_wait(bio);
+ bio_put(bio);
+
+ if (is_vmalloc && op == REQ_OP_READ)
+ invalidate_kernel_vmap_range(data, count);
+ return error;
+}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index ce45f066995e..9fa4a7ee8cfc 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -9,17 +9,16 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_bmap_item.h"
#include "xfs_log.h"
#include "xfs_bmap.h"
#include "xfs_icache.h"
-#include "xfs_trace.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
@@ -96,15 +95,6 @@ xfs_bui_item_format(
}
/*
- * Pinning has no meaning for an bui item, so just return.
- */
-STATIC void
-xfs_bui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an BUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the BUI transaction has been successfully committed to make it
@@ -123,71 +113,22 @@ xfs_bui_item_unpin(
}
/*
- * BUI items have no locking or pushing. However, since BUIs are pulled from
- * the AIL when their corresponding BUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the BUI out of
- * the AIL.
- */
-STATIC uint
-xfs_bui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The BUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an BUD isn't going to be
* constructed and thus we free the BUI here directly.
*/
STATIC void
-xfs_bui_item_unlock(
+xfs_bui_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_bui_release(BUI_ITEM(lip));
-}
-
-/*
- * The BUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_bui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
+ xfs_bui_release(BUI_ITEM(lip));
}
-/*
- * The BUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_bui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all bui log items.
- */
static const struct xfs_item_ops xfs_bui_item_ops = {
.iop_size = xfs_bui_item_size,
.iop_format = xfs_bui_item_format,
- .iop_pin = xfs_bui_item_pin,
.iop_unpin = xfs_bui_item_unpin,
- .iop_unlock = xfs_bui_item_unlock,
- .iop_committed = xfs_bui_item_committed,
- .iop_push = xfs_bui_item_push,
- .iop_committing = xfs_bui_item_committing,
+ .iop_release = xfs_bui_item_release,
};
/*
@@ -249,126 +190,241 @@ xfs_bud_item_format(
}
/*
- * Pinning has no meaning for an bud item, so just return.
+ * The BUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the BUI and free the
+ * BUD.
*/
STATIC void
-xfs_bud_item_pin(
+xfs_bud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+
+ xfs_bui_release(budp->bud_buip);
+ kmem_zone_free(xfs_bud_zone, budp);
}
-/*
- * Since pinning has no meaning for an bud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_bud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static const struct xfs_item_ops xfs_bud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_bud_item_size,
+ .iop_format = xfs_bud_item_format,
+ .iop_release = xfs_bud_item_release,
+};
+
+static struct xfs_bud_log_item *
+xfs_trans_get_bud(
+ struct xfs_trans *tp,
+ struct xfs_bui_log_item *buip)
{
+ struct xfs_bud_log_item *budp;
+
+ budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
+ xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
+ &xfs_bud_item_ops);
+ budp->bud_buip = buip;
+ budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+
+ xfs_trans_add_item(tp, &budp->bud_item);
+ return budp;
}
/*
- * There isn't much you can do to push on an bud item. It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an bmap update and log it to the BUD. Note that the
+ * transaction is marked dirty regardless of whether the bmap update
+ * succeeds or fails to support the BUI/BUD lifecycle rules.
*/
-STATIC uint
-xfs_bud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static int
+xfs_trans_log_finish_bmap_update(
+ struct xfs_trans *tp,
+ struct xfs_bud_log_item *budp,
+ enum xfs_bmap_intent_type type,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_fileoff_t startoff,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t *blockcount,
+ xfs_exntst_t state)
{
- return XFS_ITEM_PINNED;
+ int error;
+
+ error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
+ startblock, blockcount, state);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the BUI and frees the BUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
+
+ return error;
}
-/*
- * The BUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the BUI and free the
- * BUD.
- */
-STATIC void
-xfs_bud_item_unlock(
- struct xfs_log_item *lip)
+/* Sort bmap intents by inode. */
+static int
+xfs_bmap_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+ struct xfs_bmap_intent *ba;
+ struct xfs_bmap_intent *bb;
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
- }
+ ba = container_of(a, struct xfs_bmap_intent, bi_list);
+ bb = container_of(b, struct xfs_bmap_intent, bi_list);
+ return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
}
-/*
- * When the bud item is committed to disk, all we need to do is delete our
- * reference to our partner bui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_bud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Get an BUI. */
+STATIC void *
+xfs_bmap_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
{
- struct xfs_bud_log_item *budp = BUD_ITEM(lip);
+ struct xfs_bui_log_item *buip;
+
+ ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
+ ASSERT(tp != NULL);
+
+ buip = xfs_bui_init(tp->t_mountp);
+ ASSERT(buip != NULL);
/*
- * Drop the BUI reference regardless of whether the BUD has been
- * aborted. Once the BUD transaction is constructed, it is the sole
- * responsibility of the BUD to release the BUI (even if the BUI is
- * aborted due to log I/O error).
+ * Get a log_item_desc to point at the new item.
*/
- xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
+ xfs_trans_add_item(tp, &buip->bui_item);
+ return buip;
+}
- return (xfs_lsn_t)-1;
+/* Set the map extent flags for this mapping. */
+static void
+xfs_trans_set_bmap_flags(
+ struct xfs_map_extent *bmap,
+ enum xfs_bmap_intent_type type,
+ int whichfork,
+ xfs_exntst_t state)
+{
+ bmap->me_flags = 0;
+ switch (type) {
+ case XFS_BMAP_MAP:
+ case XFS_BMAP_UNMAP:
+ bmap->me_flags = type;
+ break;
+ default:
+ ASSERT(0);
+ }
+ if (state == XFS_EXT_UNWRITTEN)
+ bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
+ if (whichfork == XFS_ATTR_FORK)
+ bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
}
-/*
- * The BUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log bmap updates in the intent item. */
STATIC void
-xfs_bud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_bmap_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
+ struct xfs_bui_log_item *buip = intent;
+ struct xfs_bmap_intent *bmap;
+ uint next_extent;
+ struct xfs_map_extent *map;
+
+ bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
+
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
+ ASSERT(next_extent < buip->bui_format.bui_nextents);
+ map = &buip->bui_format.bui_extents[next_extent];
+ map->me_owner = bmap->bi_owner->i_ino;
+ map->me_startblock = bmap->bi_bmap.br_startblock;
+ map->me_startoff = bmap->bi_bmap.br_startoff;
+ map->me_len = bmap->bi_bmap.br_blockcount;
+ xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
+ bmap->bi_bmap.br_state);
}
-/*
- * This is the ops vector shared by all bud log items.
- */
-static const struct xfs_item_ops xfs_bud_item_ops = {
- .iop_size = xfs_bud_item_size,
- .iop_format = xfs_bud_item_format,
- .iop_pin = xfs_bud_item_pin,
- .iop_unpin = xfs_bud_item_unpin,
- .iop_unlock = xfs_bud_item_unlock,
- .iop_committed = xfs_bud_item_committed,
- .iop_push = xfs_bud_item_push,
- .iop_committing = xfs_bud_item_committing,
-};
+/* Get an BUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_bmap_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_bud(tp, intent);
+}
-/*
- * Allocate and initialize an bud item with the given number of extents.
- */
-struct xfs_bud_log_item *
-xfs_bud_init(
- struct xfs_mount *mp,
- struct xfs_bui_log_item *buip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_bmap_update_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_bmap_intent *bmap;
+ xfs_filblks_t count;
+ int error;
+
+ bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+ count = bmap->bi_bmap.br_blockcount;
+ error = xfs_trans_log_finish_bmap_update(tp, done_item,
+ bmap->bi_type,
+ bmap->bi_owner, bmap->bi_whichfork,
+ bmap->bi_bmap.br_startoff,
+ bmap->bi_bmap.br_startblock,
+ &count,
+ bmap->bi_bmap.br_state);
+ if (!error && count > 0) {
+ ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
+ bmap->bi_bmap.br_blockcount = count;
+ return -EAGAIN;
+ }
+ kmem_free(bmap);
+ return error;
+}
+/* Abort all pending BUIs. */
+STATIC void
+xfs_bmap_update_abort_intent(
+ void *intent)
{
- struct xfs_bud_log_item *budp;
+ xfs_bui_release(intent);
+}
- budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops);
- budp->bud_buip = buip;
- budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_bmap_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_bmap_intent *bmap;
- return budp;
+ bmap = container_of(item, struct xfs_bmap_intent, bi_list);
+ kmem_free(bmap);
}
+const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
+ .max_items = XFS_BUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_bmap_update_diff_items,
+ .create_intent = xfs_bmap_update_create_intent,
+ .abort_intent = xfs_bmap_update_abort_intent,
+ .log_item = xfs_bmap_update_log_item,
+ .create_done = xfs_bmap_update_create_done,
+ .finish_item = xfs_bmap_update_finish_item,
+ .cancel_item = xfs_bmap_update_cancel_item,
+};
+
/*
* Process a bmap update intent item that was recovered from the log.
* We need to update some inode's bmbt.
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index 89e043a88bb8..ad479cc73de8 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -75,8 +75,6 @@ extern struct kmem_zone *xfs_bui_zone;
extern struct kmem_zone *xfs_bud_zone;
struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
-struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
- struct xfs_bui_log_item *);
void xfs_bui_item_free(struct xfs_bui_log_item *);
void xfs_bui_release(struct xfs_bui_log_item *);
int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 06d07f1e310b..98c6a7a71427 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -12,12 +12,10 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_trans.h"
-#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
@@ -28,11 +26,8 @@
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_log.h"
-#include "xfs_rmap_btree.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
-#include "xfs_refcount.h"
/* Kernel only BMAP related definitions and functions */
@@ -276,7 +271,7 @@ xfs_bmap_count_tree(
struct xfs_btree_block *block, *nextblock;
int numrecs;
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
return error;
@@ -287,7 +282,7 @@ xfs_bmap_count_tree(
/* Not at node above leaves, count this level of nodes */
nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
while (nextbno != NULLFSBLOCK) {
- error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
+ error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
@@ -321,7 +316,7 @@ xfs_bmap_count_tree(
if (nextbno == NULLFSBLOCK)
break;
bno = nextbno;
- error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
+ error = xfs_btree_read_bufl(mp, tp, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (error)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 548344e25128..ca0849043f54 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -4,24 +4,9 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
#include <linux/backing-dev.h>
-#include <linux/freezer.h>
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -213,7 +198,7 @@ xfs_buf_free_maps(
}
}
-struct xfs_buf *
+static struct xfs_buf *
_xfs_buf_alloc(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
@@ -243,6 +228,7 @@ _xfs_buf_alloc(
sema_init(&bp->b_sema, 0); /* held, no waiters */
spin_lock_init(&bp->b_lock);
bp->b_target = target;
+ bp->b_mount = target->bt_mount;
bp->b_flags = flags;
/*
@@ -263,12 +249,11 @@ _xfs_buf_alloc(
bp->b_maps[i].bm_len = map[i].bm_len;
bp->b_length += map[i].bm_len;
}
- bp->b_io_length = bp->b_length;
atomic_set(&bp->b_pin_count, 0);
init_waitqueue_head(&bp->b_waiters);
- XFS_STATS_INC(target->bt_mount, xb_create);
+ XFS_STATS_INC(bp->b_mount, xb_create);
trace_xfs_buf_init(bp, _RET_IP_);
return bp;
@@ -425,12 +410,12 @@ retry:
current->comm, current->pid,
__func__, gfp_mask);
- XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
+ XFS_STATS_INC(bp->b_mount, xb_page_retries);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
- XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
+ XFS_STATS_INC(bp->b_mount, xb_page_found);
nbytes = min_t(size_t, size, PAGE_SIZE - offset);
size -= nbytes;
@@ -909,83 +894,6 @@ xfs_buf_read_uncached(
return 0;
}
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
- struct xfs_buf *bp,
- size_t numblks)
-{
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_page_count = 0;
- bp->b_addr = NULL;
- bp->b_length = numblks;
- bp->b_io_length = numblks;
-
- ASSERT(bp->b_map_count == 1);
- bp->b_bn = XFS_BUF_DADDR_NULL;
- bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
- bp->b_maps[0].bm_len = bp->b_length;
-}
-
-static inline struct page *
-mem_to_page(
- void *addr)
-{
- if ((!is_vmalloc_addr(addr))) {
- return virt_to_page(addr);
- } else {
- return vmalloc_to_page(addr);
- }
-}
-
-int
-xfs_buf_associate_memory(
- xfs_buf_t *bp,
- void *mem,
- size_t len)
-{
- int rval;
- int i = 0;
- unsigned long pageaddr;
- unsigned long offset;
- size_t buflen;
- int page_count;
-
- pageaddr = (unsigned long)mem & PAGE_MASK;
- offset = (unsigned long)mem - pageaddr;
- buflen = PAGE_ALIGN(len + offset);
- page_count = buflen >> PAGE_SHIFT;
-
- /* Free any previous set of page pointers */
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_addr = mem;
-
- rval = _xfs_buf_get_pages(bp, page_count);
- if (rval)
- return rval;
-
- bp->b_offset = offset;
-
- for (i = 0; i < bp->b_page_count; i++) {
- bp->b_pages[i] = mem_to_page((void *)pageaddr);
- pageaddr += PAGE_SIZE;
- }
-
- bp->b_io_length = BTOBB(len);
- bp->b_length = BTOBB(buflen);
-
- return 0;
-}
-
xfs_buf_t *
xfs_buf_get_uncached(
struct xfs_buftarg *target,
@@ -1180,7 +1088,7 @@ xfs_buf_lock(
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_target->bt_mount, 0);
+ xfs_log_force(bp->b_mount, 0);
down(&bp->b_sema);
trace_xfs_buf_lock_done(bp, _RET_IP_);
@@ -1269,7 +1177,7 @@ xfs_buf_ioend_async(
struct xfs_buf *bp)
{
INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
- queue_work(bp->b_ioend_wq, &bp->b_ioend_work);
+ queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
}
void
@@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert(
struct xfs_buf *bp,
const char *func)
{
- xfs_alert(bp->b_target->bt_mount,
+ xfs_alert(bp->b_mount,
"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
-bp->b_error);
@@ -1307,10 +1215,8 @@ xfs_bwrite(
XBF_WRITE_FAIL | XBF_DONE);
error = xfs_buf_submit(bp);
- if (error) {
- xfs_force_shutdown(bp->b_target->bt_mount,
- SHUTDOWN_META_IO_ERROR);
- }
+ if (error)
+ xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
return error;
}
@@ -1436,21 +1342,8 @@ _xfs_buf_ioapply(
*/
bp->b_error = 0;
- /*
- * Initialize the I/O completion workqueue if we haven't yet or the
- * submitter has not opted to specify a custom one.
- */
- if (!bp->b_ioend_wq)
- bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
-
if (bp->b_flags & XBF_WRITE) {
op = REQ_OP_WRITE;
- if (bp->b_flags & XBF_SYNCIO)
- op_flags = REQ_SYNC;
- if (bp->b_flags & XBF_FUA)
- op_flags |= REQ_FUA;
- if (bp->b_flags & XBF_FLUSH)
- op_flags |= REQ_PREFLUSH;
/*
* Run the write verifier callback function if it exists. If
@@ -1460,12 +1353,12 @@ _xfs_buf_ioapply(
if (bp->b_ops) {
bp->b_ops->verify_write(bp);
if (bp->b_error) {
- xfs_force_shutdown(bp->b_target->bt_mount,
+ xfs_force_shutdown(bp->b_mount,
SHUTDOWN_CORRUPT_INCORE);
return;
}
} else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
/*
* non-crc filesystems don't attach verifiers during
@@ -1497,7 +1390,7 @@ _xfs_buf_ioapply(
* subsequent call.
*/
offset = bp->b_offset;
- size = BBTOB(bp->b_io_length);
+ size = BBTOB(bp->b_length);
blk_start_plug(&plug);
for (i = 0; i < bp->b_map_count; i++) {
xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
@@ -1543,7 +1436,7 @@ __xfs_buf_submit(
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
/* on shutdown we stale and complete the buffer immediately */
- if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+ if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
xfs_buf_ioerror(bp, -EIO);
bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp);
@@ -1613,16 +1506,11 @@ xfs_buf_offset(
return page_address(page) + (offset & (PAGE_SIZE-1));
}
-/*
- * Move data into or out of a buffer.
- */
void
-xfs_buf_iomove(
- xfs_buf_t *bp, /* buffer to process */
- size_t boff, /* starting buffer offset */
- size_t bsize, /* length to copy */
- void *data, /* data address */
- xfs_buf_rw_t mode) /* read/write/zero flag */
+xfs_buf_zero(
+ struct xfs_buf *bp,
+ size_t boff,
+ size_t bsize)
{
size_t bend;
@@ -1635,23 +1523,13 @@ xfs_buf_iomove(
page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
page = bp->b_pages[page_index];
csize = min_t(size_t, PAGE_SIZE - page_offset,
- BBTOB(bp->b_io_length) - boff);
+ BBTOB(bp->b_length) - boff);
ASSERT((csize + page_offset) <= PAGE_SIZE);
- switch (mode) {
- case XBRW_ZERO:
- memset(page_address(page) + page_offset, 0, csize);
- break;
- case XBRW_READ:
- memcpy(data, page_address(page) + page_offset, csize);
- break;
- case XBRW_WRITE:
- memcpy(page_address(page) + page_offset, data, csize);
- }
+ memset(page_address(page) + page_offset, 0, csize);
boff += csize;
- data += csize;
}
}
@@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
* This allows userspace to disrupt buffer caching for debug/testing
* purposes.
*/
- if (XFS_TEST_ERROR(false, bp->b_target->bt_mount,
- XFS_ERRTAG_BUF_LRU_REF))
+ if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
lru_ref = 0;
atomic_set(&bp->b_lru_ref, lru_ref);
@@ -2215,7 +2092,7 @@ xfs_verify_magic(
struct xfs_buf *bp,
__be32 dmagic)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
int idx;
idx = xfs_sb_version_hascrc(&mp->m_sb);
@@ -2233,7 +2110,7 @@ xfs_verify_magic16(
struct xfs_buf *bp,
__be16 dmagic)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
int idx;
idx = xfs_sb_version_hascrc(&mp->m_sb);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d0b96e071cec..c6e57a3f409e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -21,12 +21,6 @@
#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-typedef enum {
- XBRW_READ = 1, /* transfer into target memory */
- XBRW_WRITE = 2, /* transfer from target memory */
- XBRW_ZERO = 3, /* Zero target memory */
-} xfs_buf_rw_t;
-
#define XBF_READ (1 << 0) /* buffer intended for reading from device */
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
@@ -34,12 +28,7 @@ typedef enum {
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
-#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */
-
-/* I/O hints for the BIO layer */
-#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
-#define XBF_FUA (1 << 11)/* force cache write through mode */
-#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
+#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */
/* flags used only as arguments to access routines */
#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
@@ -49,7 +38,6 @@ typedef enum {
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
-#define _XBF_COMPOUND (1 << 23)/* compound buffer */
typedef unsigned int xfs_buf_flags_t;
@@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_DONE, "DONE" }, \
{ XBF_STALE, "STALE" }, \
{ XBF_WRITE_FAIL, "WRITE_FAIL" }, \
- { XBF_SYNCIO, "SYNCIO" }, \
- { XBF_FUA, "FUA" }, \
- { XBF_FLUSH, "FLUSH" }, \
{ XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
{ XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
- { _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_COMPOUND, "COMPOUND" }
+ { _XBF_DELWRI_Q, "DELWRI_Q" }
/*
@@ -161,13 +145,13 @@ typedef struct xfs_buf {
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */
+ struct xfs_mount *b_mount;
xfs_buftarg_t *b_target; /* buffer target (device) */
void *b_addr; /* virtual address of buffer */
struct work_struct b_ioend_work;
- struct workqueue_struct *b_ioend_wq; /* I/O completion wq */
xfs_buf_iodone_t b_iodone; /* I/O completion function */
struct completion b_iowait; /* queue for I/O waiters */
- void *b_log_item;
+ struct xfs_buf_log_item *b_log_item;
struct list_head b_li_list; /* Log items list head */
struct xfs_trans *b_transp;
struct page **b_pages; /* array of page pointers */
@@ -175,7 +159,6 @@ typedef struct xfs_buf {
struct xfs_buf_map *b_maps; /* compound buffer map */
struct xfs_buf_map __b_map; /* inline compound buffer map */
int b_map_count;
- int b_io_length; /* IO size in BBs */
atomic_t b_pin_count; /* pin count */
atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */
@@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
xfs_daddr_t blkno, size_t numblks,
xfs_buf_flags_t flags);
-struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
- struct xfs_buf_map *map, int nmaps,
- xfs_buf_flags_t flags);
-
-static inline struct xfs_buf *
-xfs_buf_alloc(
- struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- size_t numblks,
- xfs_buf_flags_t flags)
-{
- DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return _xfs_buf_alloc(target, &map, 1, flags);
-}
-
struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
struct xfs_buf_map *map, int nmaps,
xfs_buf_flags_t flags);
@@ -239,11 +207,10 @@ static inline struct xfs_buf *
xfs_buf_get(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
- size_t numblks,
- xfs_buf_flags_t flags)
+ size_t numblks)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return xfs_buf_get_map(target, &map, 1, flags);
+ return xfs_buf_get_map(target, &map, 1, 0);
}
static inline struct xfs_buf *
@@ -269,9 +236,6 @@ xfs_buf_readahead(
return xfs_buf_readahead_map(target, &map, 1, ops);
}
-void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
-int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
-
struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
int flags);
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
@@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp)
return __xfs_buf_submit(bp, wait);
}
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
- xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
- xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
+void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
/* Buffer Utility Routines */
extern void *xfs_buf_offset(struct xfs_buf *, size_t);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 65b32acfa0f6..7dcaec54a20b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -5,19 +5,17 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_log.h"
-#include "xfs_inode.h"
kmem_zone_t *xfs_buf_item_zone;
@@ -520,7 +518,7 @@ xfs_buf_item_push(
/* has a previous flush failed due to IO errors? */
if ((bp->b_flags & XBF_WRITE_FAIL) &&
___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
- xfs_warn(bp->b_target->bt_mount,
+ xfs_warn(bp->b_mount,
"Failing async write on buffer block 0x%llx. Retrying async write.",
(long long)bp->b_bn);
}
@@ -594,7 +592,7 @@ xfs_buf_item_put(
* free the item.
*/
STATIC void
-xfs_buf_item_unlock(
+xfs_buf_item_release(
struct xfs_log_item *lip)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
@@ -609,7 +607,7 @@ xfs_buf_item_unlock(
&lip->li_flags);
#endif
- trace_xfs_buf_item_unlock(bip);
+ trace_xfs_buf_item_release(bip);
/*
* The bli dirty state should match whether the blf has logged segments
@@ -639,6 +637,14 @@ xfs_buf_item_unlock(
xfs_buf_relse(bp);
}
+STATIC void
+xfs_buf_item_committing(
+ struct xfs_log_item *lip,
+ xfs_lsn_t commit_lsn)
+{
+ return xfs_buf_item_release(lip);
+}
+
/*
* This is called to find out where the oldest active copy of the
* buf log item in the on disk log resides now that the last log
@@ -671,25 +677,15 @@ xfs_buf_item_committed(
return lsn;
}
-STATIC void
-xfs_buf_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
.iop_unpin = xfs_buf_item_unpin,
- .iop_unlock = xfs_buf_item_unlock,
+ .iop_release = xfs_buf_item_release,
+ .iop_committing = xfs_buf_item_committing,
.iop_committed = xfs_buf_item_committed,
.iop_push = xfs_buf_item_push,
- .iop_committing = xfs_buf_item_committing
};
STATIC int
@@ -743,7 +739,7 @@ xfs_buf_item_init(
* this buffer. If we do already have one, there is
* nothing to do here so return.
*/
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
if (bip) {
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!bp->b_transp);
@@ -980,9 +976,9 @@ xfs_buf_item_relse(
*/
void
xfs_buf_attach_iodone(
- xfs_buf_t *bp,
- void (*cb)(xfs_buf_t *, xfs_log_item_t *),
- xfs_log_item_t *lip)
+ struct xfs_buf *bp,
+ void (*cb)(struct xfs_buf *, struct xfs_log_item *),
+ struct xfs_log_item *lip)
{
ASSERT(xfs_buf_islocked(bp));
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 90f65f891fab..4a054b11011a 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -39,7 +39,7 @@ struct xfs_buf_log_item;
* locked, and which 128 byte chunks of the buffer are dirty.
*/
struct xfs_buf_log_item {
- xfs_log_item_t bli_item; /* common item structure */
+ struct xfs_log_item bli_item; /* common item structure */
struct xfs_buf *bli_buf; /* real buffer pointer */
unsigned int bli_flags; /* misc flags */
unsigned int bli_recur; /* lock recursion count */
@@ -55,8 +55,8 @@ bool xfs_buf_item_put(struct xfs_buf_log_item *);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_attach_iodone(struct xfs_buf *,
- void(*)(struct xfs_buf *, xfs_log_item_t *),
- xfs_log_item_t *);
+ void(*)(struct xfs_buf *, struct xfs_log_item *),
+ struct xfs_log_item *);
void xfs_buf_iodone_callbacks(struct xfs_buf *);
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 5142e64e2345..283df898dd9f 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -6,17 +6,14 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_trans.h"
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d0df0ed50f4b..8ec7aab89044 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -4,19 +4,17 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
-#include "xfs_discard.h"
#include "xfs_trace.h"
#include "xfs_log.h"
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a1af984e4913..fb1ad4483081 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -14,16 +14,12 @@
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_alloc.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
#include "xfs_trans_priv.h"
#include "xfs_qm.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
@@ -1243,7 +1239,7 @@ xfs_qm_exit(void)
/*
* Iterate every dquot of a particular type. The caller must ensure that the
* particular quota type is active. iter_fn can return negative error codes,
- * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating.
+ * or XFS_ITER_ABORT to indicate that it wants to stop iterating.
*/
int
xfs_qm_dqiterate(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 64bd8640f6e8..4fe85709d55d 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -34,7 +34,6 @@ typedef struct xfs_dquot {
uint dq_flags; /* various flags (XFS_DQ_*) */
struct list_head q_lru; /* global free list of dquots */
struct xfs_mount*q_mount; /* filesystem this relates to */
- struct xfs_trans*q_transp; /* trans this belongs to currently */
uint q_nrefs; /* # active refs from inodes */
xfs_daddr_t q_blkno; /* blkno of dquot buffer */
int q_bufoffset; /* off of dq in buffer (# dquots) */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 7dedd17c4813..282ec5af293e 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -5,13 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin(
wake_up(&dqp->q_pinwait);
}
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- /*
- * We always re-log the entire dquot when it becomes dirty,
- * so, the latest copy _is_ the only one that matters.
- */
- return lsn;
-}
-
/*
* This is called to wait for the given dquot to be unpinned.
* Most of these pin/unpin routines are plagiarized from inode code.
@@ -209,14 +197,8 @@ out_unlock:
return rval;
}
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction. If the
- * hold flags is set, do not unlock the dquot.
- */
STATIC void
-xfs_qm_dquot_logitem_unlock(
+xfs_qm_dquot_logitem_release(
struct xfs_log_item *lip)
{
struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
@@ -224,11 +206,6 @@ xfs_qm_dquot_logitem_unlock(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
/*
- * Clear the transaction pointer in the dquot
- */
- dqp->q_transp = NULL;
-
- /*
* dquots are never 'held' from getting unlocked at the end of
* a transaction. Their locking and unlocking is hidden inside the
* transaction layer, within trans_commit. Hence, no LI_HOLD flag
@@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock(
xfs_dqunlock(dqp);
}
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
STATIC void
xfs_qm_dquot_logitem_committing(
struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+ xfs_lsn_t commit_lsn)
{
+ return xfs_qm_dquot_logitem_release(lip);
}
-/*
- * This is the ops vector for dquots
- */
static const struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = xfs_qm_dquot_logitem_size,
.iop_format = xfs_qm_dquot_logitem_format,
.iop_pin = xfs_qm_dquot_logitem_pin,
.iop_unpin = xfs_qm_dquot_logitem_unpin,
- .iop_unlock = xfs_qm_dquot_logitem_unlock,
- .iop_committed = xfs_qm_dquot_logitem_committed,
+ .iop_release = xfs_qm_dquot_logitem_release,
+ .iop_committing = xfs_qm_dquot_logitem_committing,
.iop_push = xfs_qm_dquot_logitem_push,
- .iop_committing = xfs_qm_dquot_logitem_committing,
.iop_error = xfs_dquot_item_error
};
@@ -320,26 +289,6 @@ xfs_qm_qoff_logitem_format(
}
/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
- struct xfs_log_item *lip,
- int remove)
-{
-}
-
-/*
* There isn't much you can do to push a quotaoff item. It is simply
* stuck waiting for the log to be flushed to disk.
*/
@@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push(
return XFS_ITEM_LOCKED;
}
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
- struct xfs_log_item *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
STATIC xfs_lsn_t
xfs_qm_qoffend_logitem_committed(
struct xfs_log_item *lip,
@@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed(
return (xfs_lsn_t)-1;
}
-/*
- * XXX rcc - don't know quite what to do with this. I think we can
- * just ignore it. The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen? Also, do we need different routines for
- * quotaoff start and quotaoff end? I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable). If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
-{
-}
-
static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
.iop_size = xfs_qm_qoff_logitem_size,
.iop_format = xfs_qm_qoff_logitem_format,
- .iop_pin = xfs_qm_qoff_logitem_pin,
- .iop_unpin = xfs_qm_qoff_logitem_unpin,
- .iop_unlock = xfs_qm_qoff_logitem_unlock,
.iop_committed = xfs_qm_qoffend_logitem_committed,
.iop_push = xfs_qm_qoff_logitem_push,
- .iop_committing = xfs_qm_qoff_logitem_committing
};
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
.iop_size = xfs_qm_qoff_logitem_size,
.iop_format = xfs_qm_qoff_logitem_format,
- .iop_pin = xfs_qm_qoff_logitem_pin,
- .iop_unpin = xfs_qm_qoff_logitem_unpin,
- .iop_unlock = xfs_qm_qoff_logitem_unlock,
- .iop_committed = xfs_qm_qoff_logitem_committed,
.iop_push = xfs_qm_qoff_logitem_push,
- .iop_committing = xfs_qm_qoff_logitem_committing
};
/*
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index db9df710a308..1aed34ccdabc 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -12,13 +12,13 @@ struct xfs_mount;
struct xfs_qoff_logitem;
typedef struct xfs_dq_logitem {
- xfs_log_item_t qli_item; /* common portion */
+ struct xfs_log_item qli_item; /* common portion */
struct xfs_dquot *qli_dquot; /* dquot ptr */
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
} xfs_dq_logitem_t;
typedef struct xfs_qoff_logitem {
- xfs_log_item_t qql_item; /* common portion */
+ struct xfs_log_item qql_item; /* common portion */
struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
unsigned int qql_flags;
} xfs_qoff_logitem_t;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index a1e177f66404..544c9482a0ef 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -4,6 +4,7 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_fs.h"
#include "xfs_log_format.h"
@@ -353,7 +354,7 @@ xfs_buf_verifier_error(
size_t bufsz,
xfs_failaddr_t failaddr)
{
- struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_mount *mp = bp->b_mount;
xfs_failaddr_t fa;
int sz;
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index f2284ceb129f..f1372f9046e3 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -4,18 +4,16 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_export.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_log.h"
#include "xfs_pnfs.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 74ddf66f4cfe..86f6512d6864 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -9,14 +9,18 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_shared.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_extfree_item.h"
#include "xfs_log.h"
#include "xfs_btree.h"
#include "xfs_rmap.h"
+#include "xfs_alloc.h"
+#include "xfs_bmap.h"
+#include "xfs_trace.h"
kmem_zone_t *xfs_efi_zone;
@@ -107,15 +111,6 @@ xfs_efi_item_format(
/*
- * Pinning has no meaning for an efi item, so just return.
- */
-STATIC void
-xfs_efi_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an EFI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the EFI transaction has been successfully committed to make it
@@ -133,71 +128,22 @@ xfs_efi_item_unpin(
}
/*
- * Efi items have no locking or pushing. However, since EFIs are pulled from
- * the AIL when their corresponding EFDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the EFI out of
- * the AIL.
- */
-STATIC uint
-xfs_efi_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The EFI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an EFD isn't going to be
* constructed and thus we free the EFI here directly.
*/
STATIC void
-xfs_efi_item_unlock(
+xfs_efi_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_efi_release(EFI_ITEM(lip));
-}
-
-/*
- * The EFI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_efi_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The EFI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_efi_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
+ xfs_efi_release(EFI_ITEM(lip));
}
-/*
- * This is the ops vector shared by all efi log items.
- */
static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format,
- .iop_pin = xfs_efi_item_pin,
.iop_unpin = xfs_efi_item_unpin,
- .iop_unlock = xfs_efi_item_unlock,
- .iop_committed = xfs_efi_item_committed,
- .iop_push = xfs_efi_item_push,
- .iop_committing = xfs_efi_item_committing
+ .iop_release = xfs_efi_item_release,
};
@@ -349,136 +295,298 @@ xfs_efd_item_format(
}
/*
- * Pinning has no meaning for an efd item, so just return.
+ * The EFD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the EFI and free the EFD.
*/
STATIC void
-xfs_efd_item_pin(
+xfs_efd_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+
+ xfs_efi_release(efdp->efd_efip);
+ xfs_efd_item_free(efdp);
}
+static const struct xfs_item_ops xfs_efd_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_efd_item_size,
+ .iop_format = xfs_efd_item_format,
+ .iop_release = xfs_efd_item_release,
+};
+
/*
- * Since pinning has no meaning for an efd item, unpinning does
- * not either.
+ * Allocate an "extent free done" log item that will hold nextents worth of
+ * extents. The caller must use all nextents extents, because we are not
+ * flexible about this at all.
*/
-STATIC void
-xfs_efd_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static struct xfs_efd_log_item *
+xfs_trans_get_efd(
+ struct xfs_trans *tp,
+ struct xfs_efi_log_item *efip,
+ unsigned int nextents)
{
+ struct xfs_efd_log_item *efdp;
+
+ ASSERT(nextents > 0);
+
+ if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
+ efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
+ (nextents - 1) * sizeof(struct xfs_extent),
+ KM_SLEEP);
+ } else {
+ efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
+ }
+
+ xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
+ &xfs_efd_item_ops);
+ efdp->efd_efip = efip;
+ efdp->efd_format.efd_nextents = nextents;
+ efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+
+ xfs_trans_add_item(tp, &efdp->efd_item);
+ return efdp;
}
/*
- * There isn't much you can do to push on an efd item. It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Free an extent and log it to the EFD. Note that the transaction is marked
+ * dirty regardless of whether the extent free succeeds or fails to support the
+ * EFI/EFD lifecycle rules.
*/
-STATIC uint
-xfs_efd_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static int
+xfs_trans_free_extent(
+ struct xfs_trans *tp,
+ struct xfs_efd_log_item *efdp,
+ xfs_fsblock_t start_block,
+ xfs_extlen_t ext_len,
+ const struct xfs_owner_info *oinfo,
+ bool skip_discard)
{
- return XFS_ITEM_PINNED;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent *extp;
+ uint next_extent;
+ xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
+ xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
+ start_block);
+ int error;
+
+ trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
+
+ error = __xfs_free_extent(tp, start_block, ext_len,
+ oinfo, XFS_AG_RESV_NONE, skip_discard);
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the EFI and frees the EFD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
+
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+ extp->ext_start = start_block;
+ extp->ext_len = ext_len;
+ efdp->efd_next_extent++;
+
+ return error;
}
-/*
- * The EFD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the EFI and free the EFD.
- */
-STATIC void
-xfs_efd_item_unlock(
- struct xfs_log_item *lip)
+/* Sort bmap items by AG. */
+static int
+xfs_extent_free_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+ struct xfs_mount *mp = priv;
+ struct xfs_extent_free_item *ra;
+ struct xfs_extent_free_item *rb;
+
+ ra = container_of(a, struct xfs_extent_free_item, xefi_list);
+ rb = container_of(b, struct xfs_extent_free_item, xefi_list);
+ return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
+}
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_efi_release(efdp->efd_efip);
- xfs_efd_item_free(efdp);
- }
+/* Get an EFI. */
+STATIC void *
+xfs_extent_free_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_efi_log_item *efip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ efip = xfs_efi_init(tp->t_mountp, count);
+ ASSERT(efip != NULL);
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ xfs_trans_add_item(tp, &efip->efi_item);
+ return efip;
}
-/*
- * When the efd item is committed to disk, all we need to do is delete our
- * reference to our partner efi item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from further
- * referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_efd_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Log a free extent to the intent item. */
+STATIC void
+xfs_extent_free_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
- struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
+ struct xfs_efi_log_item *efip = intent;
+ struct xfs_extent_free_item *free;
+ uint next_extent;
+ struct xfs_extent *extp;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
/*
- * Drop the EFI reference regardless of whether the EFD has been
- * aborted. Once the EFD transaction is constructed, it is the sole
- * responsibility of the EFD to release the EFI (even if the EFI is
- * aborted due to log I/O error).
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
*/
- xfs_efi_release(efdp->efd_efip);
- xfs_efd_item_free(efdp);
+ next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
+ ASSERT(next_extent < efip->efi_format.efi_nextents);
+ extp = &efip->efi_format.efi_extents[next_extent];
+ extp->ext_start = free->xefi_startblock;
+ extp->ext_len = free->xefi_blockcount;
+}
- return (xfs_lsn_t)-1;
+/* Get an EFD so we can process all the free extents. */
+STATIC void *
+xfs_extent_free_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_efd(tp, intent, count);
}
-/*
- * The EFD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Process a free extent. */
+STATIC int
+xfs_extent_free_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_extent_free_item *free;
+ int error;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+ error = xfs_trans_free_extent(tp, done_item,
+ free->xefi_startblock,
+ free->xefi_blockcount,
+ &free->xefi_oinfo, free->xefi_skip_discard);
+ kmem_free(free);
+ return error;
+}
+
+/* Abort all pending EFIs. */
STATIC void
-xfs_efd_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_extent_free_abort_intent(
+ void *intent)
{
+ xfs_efi_release(intent);
}
-/*
- * This is the ops vector shared by all efd log items.
- */
-static const struct xfs_item_ops xfs_efd_item_ops = {
- .iop_size = xfs_efd_item_size,
- .iop_format = xfs_efd_item_format,
- .iop_pin = xfs_efd_item_pin,
- .iop_unpin = xfs_efd_item_unpin,
- .iop_unlock = xfs_efd_item_unlock,
- .iop_committed = xfs_efd_item_committed,
- .iop_push = xfs_efd_item_push,
- .iop_committing = xfs_efd_item_committing
+/* Cancel a free extent. */
+STATIC void
+xfs_extent_free_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_extent_free_item *free;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+ kmem_free(free);
+}
+
+const struct xfs_defer_op_type xfs_extent_free_defer_type = {
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_extent_free_diff_items,
+ .create_intent = xfs_extent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .log_item = xfs_extent_free_log_item,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_extent_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
};
/*
- * Allocate and initialize an efd item with the given number of extents.
+ * AGFL blocks are accounted differently in the reserve pools and are not
+ * inserted into the busy extent list.
*/
-struct xfs_efd_log_item *
-xfs_efd_init(
- struct xfs_mount *mp,
- struct xfs_efi_log_item *efip,
- uint nextents)
-
+STATIC int
+xfs_agfl_free_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
{
- struct xfs_efd_log_item *efdp;
- uint size;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_efd_log_item *efdp = done_item;
+ struct xfs_extent_free_item *free;
+ struct xfs_extent *extp;
+ struct xfs_buf *agbp;
+ int error;
+ xfs_agnumber_t agno;
+ xfs_agblock_t agbno;
+ uint next_extent;
+
+ free = container_of(item, struct xfs_extent_free_item, xefi_list);
+ ASSERT(free->xefi_blockcount == 1);
+ agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
+ agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+
+ trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
+
+ error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
+ if (!error)
+ error = xfs_free_agfl_block(tp, agno, agbno, agbp,
+ &free->xefi_oinfo);
- ASSERT(nextents > 0);
- if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
- size = (uint)(sizeof(xfs_efd_log_item_t) +
- ((nextents - 1) * sizeof(xfs_extent_t)));
- efdp = kmem_zalloc(size, KM_SLEEP);
- } else {
- efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
- }
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the EFI and frees the EFD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
- xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
- efdp->efd_efip = efip;
- efdp->efd_format.efd_nextents = nextents;
- efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
+ next_extent = efdp->efd_next_extent;
+ ASSERT(next_extent < efdp->efd_format.efd_nextents);
+ extp = &(efdp->efd_format.efd_extents[next_extent]);
+ extp->ext_start = free->xefi_startblock;
+ extp->ext_len = free->xefi_blockcount;
+ efdp->efd_next_extent++;
- return efdp;
+ kmem_free(free);
+ return error;
}
+/* sub-type with special handling for AGFL deferred frees */
+const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_extent_free_diff_items,
+ .create_intent = xfs_extent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .log_item = xfs_extent_free_log_item,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_agfl_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
+};
+
/*
* Process an extent free intent item that was recovered from
* the log. We need to free the extents that it describes.
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 2a6a895ca73e..16aaab06d4ec 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -51,7 +51,7 @@ struct kmem_zone;
* AIL, so at this point both the EFI and EFD are freed.
*/
typedef struct xfs_efi_log_item {
- xfs_log_item_t efi_item;
+ struct xfs_log_item efi_item;
atomic_t efi_refcount;
atomic_t efi_next_extent;
unsigned long efi_flags; /* misc flags */
@@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item {
* have been freed.
*/
typedef struct xfs_efd_log_item {
- xfs_log_item_t efd_item;
+ struct xfs_log_item efd_item;
xfs_efi_log_item_t *efd_efip;
uint efd_next_extent;
xfs_efd_log_format_t efd_format;
@@ -79,8 +79,6 @@ extern struct kmem_zone *xfs_efi_zone;
extern struct kmem_zone *xfs_efd_zone;
xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint);
-xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
- uint);
int xfs_efi_copy_format(xfs_log_iovec_t *buf,
xfs_efi_log_format_t *dst_efi_fmt);
void xfs_efi_item_free(xfs_efi_log_item_t *);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 916a35cae5e9..e93bacbd49ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -10,14 +10,11 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_ioctl.h"
@@ -28,9 +25,7 @@
#include "xfs_iomap.h"
#include "xfs_reflink.h"
-#include <linux/dcache.h>
#include <linux/falloc.h>
-#include <linux/pagevec.h>
#include <linux/backing-dev.h>
#include <linux/mman.h>
@@ -379,6 +374,7 @@ xfs_dio_write_end_io(
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
+ unsigned int nofs_flag;
int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
@@ -395,10 +391,17 @@ xfs_dio_write_end_io(
*/
XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
+ /*
+ * We can allocate memory here while doing writeback on behalf of
+ * memory reclaim. To avoid memory allocation deadlocks set the
+ * task-wide nofs context for the following operations.
+ */
+ nofs_flag = memalloc_nofs_save();
+
if (flags & IOMAP_DIO_COW) {
error = xfs_reflink_end_cow(ip, offset, size);
if (error)
- return error;
+ goto out;
}
/*
@@ -407,8 +410,10 @@ xfs_dio_write_end_io(
* earlier allows a racing dio read to find unwritten extents before
* they are converted.
*/
- if (flags & IOMAP_DIO_UNWRITTEN)
- return xfs_iomap_write_unwritten(ip, offset, size, true);
+ if (flags & IOMAP_DIO_UNWRITTEN) {
+ error = xfs_iomap_write_unwritten(ip, offset, size, true);
+ goto out;
+ }
/*
* We need to update the in-core inode size here so that we don't end up
@@ -430,6 +435,8 @@ xfs_dio_write_end_io(
spin_unlock(&ip->i_flags_lock);
}
+out:
+ memalloc_nofs_restore(nofs_flag);
return error;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 182501373af2..574a7a8b4736 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -5,22 +5,19 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_alloc.h"
#include "xfs_mru_cache.h"
-#include "xfs_filestream.h"
#include "xfs_trace.h"
#include "xfs_ag_resv.h"
#include "xfs_trans.h"
-#include "xfs_shared.h"
struct xfs_fstrm_item {
struct xfs_mru_cache_elem mru;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3d76a9e35870..5a8f9641562a 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -9,16 +9,12 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_error.h"
#include "xfs_btree.h"
#include "xfs_rmap_btree.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
#include "xfs_rmap.h"
#include "xfs_alloc.h"
#include "xfs_bit.h"
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3d0e0570e3aa..3e61d0cc23f8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -11,15 +11,11 @@
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_error.h"
-#include "xfs_btree.h"
#include "xfs_alloc.h"
#include "xfs_fsops.h"
#include "xfs_trans_space.h"
-#include "xfs_rtalloc.h"
-#include "xfs_trace.h"
#include "xfs_log.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
@@ -251,9 +247,9 @@ xfs_growfs_data(
if (mp->m_sb.sb_imax_pct) {
uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
do_div(icount, 100);
- mp->m_maxicount = XFS_FSB_TO_INO(mp, icount);
+ M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount);
} else
- mp->m_maxicount = 0;
+ M_IGEO(mp)->maxicount = 0;
/* Update secondary superblocks now the physical grow has completed */
error = xfs_update_secondary_sbs(mp);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index d0d377384120..fa55ab8b8d80 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include "xfs_sysctl.h"
/*
* Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
@@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = {
#else
.bug_on_assert = false, /* assert failures WARN() */
#endif
+#ifdef DEBUG
+ .pwork_threads = -1, /* automatic thread detection */
+#endif
};
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 4c4929f9e7bf..8e0cb05a7142 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -9,12 +9,8 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trace.h"
#include "xfs_health.h"
@@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = {
void
xfs_bulkstat_health(
struct xfs_inode *ip,
- struct xfs_bstat *bs)
+ struct xfs_bulkstat *bs)
{
const struct ioctl_sick_map *m;
unsigned int sick;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index a76b27565a18..0b0fd10a36d4 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -5,13 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
@@ -23,8 +23,6 @@
#include "xfs_dquot.h"
#include "xfs_reflink.h"
-#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/iversion.h>
/*
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 8381d34cb102..d99a0a3e5f40 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -6,14 +6,9 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
-#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_error.h"
#include "xfs_icreate_item.h"
#include "xfs_log.h"
@@ -56,80 +51,18 @@ xfs_icreate_item_format(
sizeof(struct xfs_icreate_log));
}
-
-/* Pinning has no meaning for the create item, so just return. */
STATIC void
-xfs_icreate_item_pin(
+xfs_icreate_item_release(
struct xfs_log_item *lip)
{
+ kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip));
}
-
-/* pinning has no meaning for the create item, so just return. */
-STATIC void
-xfs_icreate_item_unpin(
- struct xfs_log_item *lip,
- int remove)
-{
-}
-
-STATIC void
-xfs_icreate_item_unlock(
- struct xfs_log_item *lip)
-{
- struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- kmem_zone_free(xfs_icreate_zone, icp);
- return;
-}
-
-/*
- * Because we have ordered buffers being tracked in the AIL for the inode
- * creation, we don't need the create item after this. Hence we can free
- * the log item and return -1 to tell the caller we're done with the item.
- */
-STATIC xfs_lsn_t
-xfs_icreate_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- struct xfs_icreate_item *icp = ICR_ITEM(lip);
-
- kmem_zone_free(xfs_icreate_zone, icp);
- return (xfs_lsn_t)-1;
-}
-
-/* item can never get into the AIL */
-STATIC uint
-xfs_icreate_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- ASSERT(0);
- return XFS_ITEM_SUCCESS;
-}
-
-/* Ordered buffers do the dependency tracking here, so this does nothing. */
-STATIC void
-xfs_icreate_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all buf log items.
- */
static const struct xfs_item_ops xfs_icreate_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
.iop_size = xfs_icreate_item_size,
.iop_format = xfs_icreate_item_format,
- .iop_pin = xfs_icreate_item_pin,
- .iop_unpin = xfs_icreate_item_unpin,
- .iop_push = xfs_icreate_item_push,
- .iop_unlock = xfs_icreate_item_unlock,
- .iop_committed = xfs_icreate_item_committed,
- .iop_committing = xfs_icreate_item_committing,
+ .iop_release = xfs_icreate_item_release,
};
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 71d216cf6f87..6467d5e1df2d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3,7 +3,6 @@
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/log2.h>
#include <linux/iversion.h>
#include "xfs.h"
@@ -16,10 +15,7 @@
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
-#include "xfs_attr_sf.h"
#include "xfs_attr.h"
#include "xfs_trans_space.h"
#include "xfs_trans.h"
@@ -32,7 +28,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_filestream.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
@@ -40,7 +35,6 @@
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
#include "xfs_reflink.h"
-#include "xfs_dir2_priv.h"
kmem_zone_t *xfs_inode_zone;
@@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass)
*/
static void
xfs_lock_inodes(
- xfs_inode_t **ips,
- int inodes,
- uint lock_mode)
+ struct xfs_inode **ips,
+ int inodes,
+ uint lock_mode)
{
- int attempts = 0, i, j, try_lock;
- xfs_log_item_t *lp;
+ int attempts = 0, i, j, try_lock;
+ struct xfs_log_item *lp;
/*
* Currently supports between 2 and 5 inodes with exclusive locking. We
@@ -485,7 +479,7 @@ again:
*/
if (!try_lock) {
for (j = (i - 1); j >= 0 && !try_lock; j--) {
- lp = (xfs_log_item_t *)ips[j]->i_itemp;
+ lp = &ips[j]->i_itemp->ili_item;
if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
try_lock++;
}
@@ -551,7 +545,7 @@ xfs_lock_two_inodes(
struct xfs_inode *temp;
uint mode_temp;
int attempts = 0;
- xfs_log_item_t *lp;
+ struct xfs_log_item *lp;
ASSERT(hweight32(ip0_mode) == 1);
ASSERT(hweight32(ip1_mode) == 1);
@@ -585,7 +579,7 @@ xfs_lock_two_inodes(
* the second lock. If we can't get it, we must release the first one
* and try again.
*/
- lp = (xfs_log_item_t *)ip0->i_itemp;
+ lp = &ip0->i_itemp->ili_item;
if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
xfs_iunlock(ip0, ip0_mode);
@@ -2537,13 +2531,14 @@ xfs_ifree_cluster(
xfs_inode_log_item_t *iip;
struct xfs_log_item *lip;
struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_ino_t inum;
inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
- nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster;
+ nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
- for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) {
+ for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
/*
* The allocation bitmap tells us which inodes of the chunk were
* physically allocated. Skip the cluster if an inode falls into
@@ -2551,7 +2546,7 @@ xfs_ifree_cluster(
*/
ioffset = inum - xic->first_ino;
if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
- ASSERT(ioffset % mp->m_inodes_per_cluster == 0);
+ ASSERT(ioffset % igeo->inodes_per_cluster == 0);
continue;
}
@@ -2567,7 +2562,7 @@ xfs_ifree_cluster(
* to mark all the active inodes on the buffer stale.
*/
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * mp->m_blocks_per_cluster,
+ mp->m_bsize * igeo->blocks_per_cluster,
XBF_UNMAPPED);
if (!bp)
@@ -2614,7 +2609,7 @@ xfs_ifree_cluster(
* transaction stale above, which means there is no point in
* even trying to lock them.
*/
- for (i = 0; i < mp->m_inodes_per_cluster; i++) {
+ for (i = 0; i < igeo->inodes_per_cluster; i++) {
retry:
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root,
@@ -3472,28 +3467,27 @@ xfs_iflush_cluster(
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
unsigned long first_index, mask;
- unsigned long inodes_per_cluster;
int cilist_size;
struct xfs_inode **cilist;
struct xfs_inode *cip;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
int nr_found;
int clcount = 0;
int i;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
- cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+ cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
if (!cilist)
goto out_put;
- mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
+ mask = ~(igeo->inodes_per_cluster - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
rcu_read_lock();
/* really need a gang lookup range call here */
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
- first_index, inodes_per_cluster);
+ first_index, igeo->inodes_per_cluster);
if (nr_found == 0)
goto out_free;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index fa1c4fe2ffbf..c9a502eed204 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -5,6 +5,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -12,7 +13,6 @@
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_inode_item.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
@@ -565,7 +565,7 @@ out_unlock:
* Unlock the inode associated with the inode log item.
*/
STATIC void
-xfs_inode_item_unlock(
+xfs_inode_item_release(
struct xfs_log_item *lip)
{
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
@@ -621,23 +621,21 @@ xfs_inode_item_committed(
STATIC void
xfs_inode_item_committing(
struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+ xfs_lsn_t commit_lsn)
{
- INODE_ITEM(lip)->ili_last_lsn = lsn;
+ INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
+ return xfs_inode_item_release(lip);
}
-/*
- * This is the ops vector shared by all buf log items.
- */
static const struct xfs_item_ops xfs_inode_item_ops = {
.iop_size = xfs_inode_item_size,
.iop_format = xfs_inode_item_format,
.iop_pin = xfs_inode_item_pin,
.iop_unpin = xfs_inode_item_unpin,
- .iop_unlock = xfs_inode_item_unlock,
+ .iop_release = xfs_inode_item_release,
.iop_committed = xfs_inode_item_committed,
.iop_push = xfs_inode_item_push,
- .iop_committing = xfs_inode_item_committing,
+ .iop_committing = xfs_inode_item_committing,
.iop_error = xfs_inode_item_error
};
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 27081eba220c..07a60e74c39c 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -14,7 +14,7 @@ struct xfs_inode;
struct xfs_mount;
typedef struct xfs_inode_log_item {
- xfs_log_item_t ili_item; /* common portion */
+ struct xfs_log_item ili_item; /* common portion */
struct xfs_inode *ili_inode; /* inode ptr */
xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d7dfc13f30f5..6f7848cd5527 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -11,9 +11,8 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_attr.h"
@@ -25,7 +24,6 @@
#include "xfs_export.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_symlink.h"
#include "xfs_trans.h"
#include "xfs_acl.h"
#include "xfs_btree.h"
@@ -36,14 +34,8 @@
#include "xfs_ag.h"
#include "xfs_health.h"
-#include <linux/capability.h>
-#include <linux/cred.h>
-#include <linux/dcache.h>
#include <linux/mount.h>
#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
/*
* xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
@@ -721,16 +713,45 @@ out_unlock:
return error;
}
+/* Return 0 on success or positive error */
+int
+xfs_fsbulkstat_one_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat)
+{
+ struct xfs_bstat bs1;
+
+ xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
+ if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat));
+}
+
+int
+xfs_fsinumbers_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_inumbers *igrp)
+{
+ struct xfs_inogrp ig1;
+
+ xfs_inumbers_to_inogrp(&ig1, igrp);
+ if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp));
+}
+
STATIC int
-xfs_ioc_bulkstat(
+xfs_ioc_fsbulkstat(
xfs_mount_t *mp,
unsigned int cmd,
void __user *arg)
{
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
+ struct xfs_fsop_bulkreq bulkreq;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ .ocount = 0,
+ };
+ xfs_ino_t lastino;
int error;
/* done = 1 if there are more stats to get and if bulkstat */
@@ -742,41 +763,243 @@ xfs_ioc_bulkstat(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
+ if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq)))
return -EFAULT;
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+ if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
return -EFAULT;
- if ((count = bulkreq.icount) <= 0)
+ if (bulkreq.icount <= 0)
return -EINVAL;
if (bulkreq.ubuffer == NULL)
return -EINVAL;
- if (cmd == XFS_IOC_FSINUMBERS)
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, xfs_inumbers_fmt);
- else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
- error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer,
- sizeof(xfs_bstat_t), NULL, &done);
- else /* XFS_IOC_FSBULKSTAT */
- error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- &done);
+ breq.ubuffer = bulkreq.ubuffer;
+ breq.icount = bulkreq.icount;
+
+ /*
+ * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+ * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect
+ * that *lastip contains either zero or the number of the last inode to
+ * be examined by the previous call and return results starting with
+ * the next inode after that. The new bulk request back end functions
+ * take the inode to start with, so we have to compute the startino
+ * parameter from lastino to maintain correct function. lastino == 0
+ * is a special case because it has traditionally meant "first inode
+ * in filesystem".
+ */
+ if (cmd == XFS_IOC_FSINUMBERS) {
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_inumbers(&breq, xfs_fsinumbers_fmt);
+ lastino = breq.startino - 1;
+ } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) {
+ breq.startino = lastino;
+ breq.icount = 1;
+ error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt);
+ } else { /* XFS_IOC_FSBULKSTAT */
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt);
+ lastino = breq.startino - 1;
+ }
if (error)
return error;
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -EFAULT;
+ if (bulkreq.lastip != NULL &&
+ copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+ return -EFAULT;
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -EFAULT;
+ if (bulkreq.ocount != NULL &&
+ copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/* Return 0 on success or positive error */
+static int
+xfs_bulkstat_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat)
+{
+ if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat));
+}
+
+/*
+ * Check the incoming bulk request @hdr from userspace and initialize the
+ * internal @breq bulk request appropriately. Returns 0 if the bulk request
+ * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual
+ * negative error code.
+ */
+static int
+xfs_bulk_ireq_setup(
+ struct xfs_mount *mp,
+ struct xfs_bulk_ireq *hdr,
+ struct xfs_ibulk *breq,
+ void __user *ubuffer)
+{
+ if (hdr->icount == 0 ||
+ (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) ||
+ memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+ return -EINVAL;
+
+ breq->startino = hdr->ino;
+ breq->ubuffer = ubuffer;
+ breq->icount = hdr->icount;
+ breq->ocount = 0;
+ breq->flags = 0;
+
+ /*
+ * The @ino parameter is a special value, so we must look it up here.
+ * We're not allowed to have IREQ_AGNO, and we only return one inode
+ * worth of data.
+ */
+ if (hdr->flags & XFS_BULK_IREQ_SPECIAL) {
+ if (hdr->flags & XFS_BULK_IREQ_AGNO)
+ return -EINVAL;
+
+ switch (hdr->ino) {
+ case XFS_BULK_IREQ_SPECIAL_ROOT:
+ hdr->ino = mp->m_sb.sb_rootino;
+ break;
+ default:
+ return -EINVAL;
+ }
+ breq->icount = 1;
}
+ /*
+ * The IREQ_AGNO flag means that we only want results from a given AG.
+ * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is
+ * beyond the specified AG then we return no results.
+ */
+ if (hdr->flags & XFS_BULK_IREQ_AGNO) {
+ if (hdr->agno >= mp->m_sb.sb_agcount)
+ return -EINVAL;
+
+ if (breq->startino == 0)
+ breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0);
+ else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno)
+ return -EINVAL;
+
+ breq->flags |= XFS_IBULK_SAME_AG;
+
+ /* Asking for an inode past the end of the AG? We're done! */
+ if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
+ return XFS_ITER_ABORT;
+ } else if (hdr->agno)
+ return -EINVAL;
+
+ /* Asking for an inode past the end of the FS? We're done! */
+ if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount)
+ return XFS_ITER_ABORT;
+
+ return 0;
+}
+
+/*
+ * Update the userspace bulk request @hdr to reflect the end state of the
+ * internal bulk request @breq.
+ */
+static void
+xfs_bulk_ireq_teardown(
+ struct xfs_bulk_ireq *hdr,
+ struct xfs_ibulk *breq)
+{
+ hdr->ino = breq->startino;
+ hdr->ocount = breq->ocount;
+}
+
+/* Handle the v5 bulkstat ioctl. */
+STATIC int
+xfs_ioc_bulkstat(
+ struct xfs_mount *mp,
+ unsigned int cmd,
+ struct xfs_bulkstat_req __user *arg)
+{
+ struct xfs_bulk_ireq hdr;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ };
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat);
+ if (error == XFS_ITER_ABORT)
+ goto out_teardown;
+ if (error < 0)
+ return error;
+
+ error = xfs_bulkstat(&breq, xfs_bulkstat_fmt);
+ if (error)
+ return error;
+
+out_teardown:
+ xfs_bulk_ireq_teardown(&hdr, &breq);
+ if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ return 0;
+}
+
+STATIC int
+xfs_inumbers_fmt(
+ struct xfs_ibulk *breq,
+ const struct xfs_inumbers *igrp)
+{
+ if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers)))
+ return -EFAULT;
+ return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers));
+}
+
+/* Handle the v5 inumbers ioctl. */
+STATIC int
+xfs_ioc_inumbers(
+ struct xfs_mount *mp,
+ unsigned int cmd,
+ struct xfs_inumbers_req __user *arg)
+{
+ struct xfs_bulk_ireq hdr;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ };
+ int error;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
+ return -EFAULT;
+
+ error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
+ if (error == XFS_ITER_ABORT)
+ goto out_teardown;
+ if (error < 0)
+ return error;
+
+ error = xfs_inumbers(&breq, xfs_inumbers_fmt);
+ if (error)
+ return error;
+
+out_teardown:
+ xfs_bulk_ireq_teardown(&hdr, &breq);
+ if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+
return 0;
}
@@ -879,37 +1102,44 @@ xfs_di2lxflags(
return flags;
}
-STATIC int
-xfs_ioc_fsgetxattr(
- xfs_inode_t *ip,
- int attr,
- void __user *arg)
+static void
+xfs_fill_fsxattr(
+ struct xfs_inode *ip,
+ bool attr,
+ struct fsxattr *fa)
{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(struct fsxattr));
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- fa.fsx_xflags = xfs_ip2xflags(ip);
- fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_cowextsize = ip->i_d.di_cowextsize <<
+ simple_fill_fsxattr(fa, xfs_ip2xflags(ip));
+ fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
+ fa->fsx_cowextsize = ip->i_d.di_cowextsize <<
ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_projid = xfs_get_projid(ip);
+ fa->fsx_projid = xfs_get_projid(ip);
if (attr) {
if (ip->i_afp) {
if (ip->i_afp->if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = xfs_iext_count(ip->i_afp);
+ fa->fsx_nextents = xfs_iext_count(ip->i_afp);
else
- fa.fsx_nextents = ip->i_d.di_anextents;
+ fa->fsx_nextents = ip->i_d.di_anextents;
} else
- fa.fsx_nextents = 0;
+ fa->fsx_nextents = 0;
} else {
if (ip->i_df.if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = xfs_iext_count(&ip->i_df);
+ fa->fsx_nextents = xfs_iext_count(&ip->i_df);
else
- fa.fsx_nextents = ip->i_d.di_nextents;
+ fa->fsx_nextents = ip->i_d.di_nextents;
}
+}
+
+STATIC int
+xfs_ioc_fsgetxattr(
+ xfs_inode_t *ip,
+ int attr,
+ void __user *arg)
+{
+ struct fsxattr fa;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ xfs_fill_fsxattr(ip, attr, &fa);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (copy_to_user(arg, &fa, sizeof(fa)))
@@ -1035,15 +1265,6 @@ xfs_ioctl_setattr_xflags(
if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
return -EINVAL;
- /*
- * Can't modify an immutable/append-only file unless
- * we have appropriate permission.
- */
- if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) ||
- (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) &&
- !capable(CAP_LINUX_IMMUTABLE))
- return -EPERM;
-
/* diflags2 only valid for v3 inodes. */
di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
if (di_flags2 && ip->i_d.di_version < 3)
@@ -1202,39 +1423,31 @@ xfs_ioctl_setattr_check_extsize(
struct fsxattr *fa)
{
struct xfs_mount *mp = ip->i_mount;
-
- if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode))
- return -EINVAL;
-
- if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
- !S_ISDIR(VFS_I(ip)->i_mode))
- return -EINVAL;
+ xfs_extlen_t size;
+ xfs_fsblock_t extsize_fsb;
if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
return -EINVAL;
- if (fa->fsx_extsize != 0) {
- xfs_extlen_t size;
- xfs_fsblock_t extsize_fsb;
-
- extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
- if (extsize_fsb > MAXEXTLEN)
- return -EINVAL;
+ if (fa->fsx_extsize == 0)
+ return 0;
- if (XFS_IS_REALTIME_INODE(ip) ||
- (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
- size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
- } else {
- size = mp->m_sb.sb_blocksize;
- if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
- return -EINVAL;
- }
+ extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
+ if (extsize_fsb > MAXEXTLEN)
+ return -EINVAL;
- if (fa->fsx_extsize % size)
+ if (XFS_IS_REALTIME_INODE(ip) ||
+ (fa->fsx_xflags & FS_XFLAG_REALTIME)) {
+ size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog;
+ } else {
+ size = mp->m_sb.sb_blocksize;
+ if (extsize_fsb > mp->m_sb.sb_agblocks / 2)
return -EINVAL;
- } else
- fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT);
+ }
+
+ if (fa->fsx_extsize % size)
+ return -EINVAL;
return 0;
}
@@ -1260,6 +1473,8 @@ xfs_ioctl_setattr_check_cowextsize(
struct fsxattr *fa)
{
struct xfs_mount *mp = ip->i_mount;
+ xfs_extlen_t size;
+ xfs_fsblock_t cowextsize_fsb;
if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE))
return 0;
@@ -1268,25 +1483,19 @@ xfs_ioctl_setattr_check_cowextsize(
ip->i_d.di_version != 3)
return -EINVAL;
- if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode))
- return -EINVAL;
-
- if (fa->fsx_cowextsize != 0) {
- xfs_extlen_t size;
- xfs_fsblock_t cowextsize_fsb;
+ if (fa->fsx_cowextsize == 0)
+ return 0;
- cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
- if (cowextsize_fsb > MAXEXTLEN)
- return -EINVAL;
+ cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize);
+ if (cowextsize_fsb > MAXEXTLEN)
+ return -EINVAL;
- size = mp->m_sb.sb_blocksize;
- if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
- return -EINVAL;
+ size = mp->m_sb.sb_blocksize;
+ if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2)
+ return -EINVAL;
- if (fa->fsx_cowextsize % size)
- return -EINVAL;
- } else
- fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE;
+ if (fa->fsx_cowextsize % size)
+ return -EINVAL;
return 0;
}
@@ -1300,21 +1509,6 @@ xfs_ioctl_setattr_check_projid(
if (fa->fsx_projid > (uint16_t)-1 &&
!xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
return -EINVAL;
-
- /*
- * Project Quota ID state is only allowed to change from within the init
- * namespace. Enforce that restriction only if we are trying to change
- * the quota ID state. Everything else is allowed in user namespaces.
- */
- if (current_user_ns() == &init_user_ns)
- return 0;
-
- if (xfs_get_projid(ip) != fa->fsx_projid)
- return -EINVAL;
- if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) !=
- (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT))
- return -EINVAL;
-
return 0;
}
@@ -1323,6 +1517,7 @@ xfs_ioctl_setattr(
xfs_inode_t *ip,
struct fsxattr *fa)
{
+ struct fsxattr old_fa;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
struct xfs_dquot *udqp = NULL;
@@ -1370,7 +1565,6 @@ xfs_ioctl_setattr(
goto error_free_dquots;
}
-
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) &&
xfs_get_projid(ip) != fa->fsx_projid) {
code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp,
@@ -1379,6 +1573,11 @@ xfs_ioctl_setattr(
goto error_trans_cancel;
}
+ xfs_fill_fsxattr(ip, false, &old_fa);
+ code = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa);
+ if (code)
+ goto error_trans_cancel;
+
code = xfs_ioctl_setattr_check_extsize(ip, fa);
if (code)
goto error_trans_cancel;
@@ -1489,6 +1688,7 @@ xfs_ioc_setxflags(
{
struct xfs_trans *tp;
struct fsxattr fa;
+ struct fsxattr old_fa;
unsigned int flags;
int join_flags = 0;
int error;
@@ -1524,6 +1724,13 @@ xfs_ioc_setxflags(
goto out_drop_write;
}
+ xfs_fill_fsxattr(ip, false, &old_fa);
+ error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa);
+ if (error) {
+ xfs_trans_cancel(tp);
+ goto out_drop_write;
+ }
+
error = xfs_ioctl_setattr_xflags(tp, ip, &fa);
if (error) {
xfs_trans_cancel(tp);
@@ -1942,7 +2149,12 @@ xfs_file_ioctl(
case XFS_IOC_FSBULKSTAT_SINGLE:
case XFS_IOC_FSBULKSTAT:
case XFS_IOC_FSINUMBERS:
+ return xfs_ioc_fsbulkstat(mp, cmd, arg);
+
+ case XFS_IOC_BULKSTAT:
return xfs_ioc_bulkstat(mp, cmd, arg);
+ case XFS_IOC_INUMBERS:
+ return xfs_ioc_inumbers(mp, cmd, arg);
case XFS_IOC_FSGEOMETRY_V1:
return xfs_ioc_fsgeometry(mp, arg, 3);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 4b17f67c888a..654c0bb1bcf8 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -77,4 +77,12 @@ xfs_set_dmattrs(
uint evmask,
uint16_t state);
+struct xfs_ibulk;
+struct xfs_bstat;
+struct xfs_inogrp;
+
+int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat);
+int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp);
+
#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 614fc6886d24..7fcf7569743f 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -3,23 +3,19 @@
* Copyright (c) 2004-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/compat.h>
-#include <linux/ioctl.h>
#include <linux/mount.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
#include <linux/fsmap.h>
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
-#include "xfs_error.h"
#include "xfs_fsops.h"
-#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_attr.h"
#include "xfs_ioctl.h"
@@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin(
}
STATIC int
-xfs_inumbers_fmt_compat(
- void __user *ubuffer,
- const struct xfs_inogrp *buffer,
- long count,
- long *written)
+xfs_fsinumbers_fmt_compat(
+ struct xfs_ibulk *breq,
+ const struct xfs_inumbers *ig)
{
- compat_xfs_inogrp_t __user *p32 = ubuffer;
- long i;
+ struct compat_xfs_inogrp __user *p32 = breq->ubuffer;
+ struct xfs_inogrp ig1;
+ struct xfs_inogrp *igrp = &ig1;
- for (i = 0; i < count; i++) {
- if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
- put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
- put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
- return -EFAULT;
- }
- *written = count * sizeof(*p32);
- return 0;
+ xfs_inumbers_to_inogrp(&ig1, ig);
+
+ if (put_user(igrp->xi_startino, &p32->xi_startino) ||
+ put_user(igrp->xi_alloccount, &p32->xi_alloccount) ||
+ put_user(igrp->xi_allocmask, &p32->xi_allocmask))
+ return -EFAULT;
+
+ return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp));
}
#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
+#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt
#endif /* BROKEN_X86_ALIGNMENT */
STATIC int
@@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin(
return 0;
}
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
+/*
+ * struct xfs_bstat has differing alignment on intel, & bstime_t sizes
+ * everywhere
+ */
STATIC int
xfs_ioctl32_bstat_copyin(
- xfs_bstat_t *bstat,
- compat_xfs_bstat_t __user *bstat32)
+ struct xfs_bstat *bstat,
+ struct compat_xfs_bstat __user *bstat32)
{
if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
get_user(bstat->bs_mode, &bstat32->bs_mode) ||
@@ -171,16 +169,15 @@ xfs_bstime_store_compat(
/* Return 0 on success or positive error (to xfs_bulkstat()) */
STATIC int
-xfs_bulkstat_one_fmt_compat(
- void __user *ubuffer,
- int ubsize,
- int *ubused,
- const xfs_bstat_t *buffer)
+xfs_fsbulkstat_one_fmt_compat(
+ struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat)
{
- compat_xfs_bstat_t __user *p32 = ubuffer;
+ struct compat_xfs_bstat __user *p32 = breq->ubuffer;
+ struct xfs_bstat bs1;
+ struct xfs_bstat *buffer = &bs1;
- if (ubsize < sizeof(*p32))
- return -ENOMEM;
+ xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
if (put_user(buffer->bs_ino, &p32->bs_ino) ||
put_user(buffer->bs_mode, &p32->bs_mode) ||
@@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat(
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
return -EFAULT;
- if (ubused)
- *ubused = sizeof(*p32);
- return 0;
-}
-STATIC int
-xfs_bulkstat_one_compat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
-{
- return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt_compat,
- ubused, stat);
+ return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat));
}
/* copied from xfs_ioctl.c */
STATIC int
-xfs_compat_ioc_bulkstat(
+xfs_compat_ioc_fsbulkstat(
xfs_mount_t *mp,
unsigned int cmd,
- compat_xfs_fsop_bulkreq_t __user *p32)
+ struct compat_xfs_fsop_bulkreq __user *p32)
{
u32 addr;
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
+ struct xfs_fsop_bulkreq bulkreq;
+ struct xfs_ibulk breq = {
+ .mp = mp,
+ .ocount = 0,
+ };
+ xfs_ino_t lastino;
int error;
/*
@@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat(
* to userpace memory via bulkreq.ubuffer. Normally the compat
* functions and structure size are the correct ones to use ...
*/
- inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat;
- bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat;
- size_t bs_one_size = sizeof(struct compat_xfs_bstat);
+ inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat;
+ bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat;
#ifdef CONFIG_X86_X32
if (in_x32_syscall()) {
@@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat(
* the data written out in compat layout will not match what
* x32 userspace expects.
*/
- inumbers_func = xfs_inumbers_fmt;
- bs_one_func = xfs_bulkstat_one;
- bs_one_size = sizeof(struct xfs_bstat);
+ inumbers_func = xfs_fsinumbers_fmt;
+ bs_one_func = xfs_fsbulkstat_one_fmt;
}
#endif
@@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat(
return -EFAULT;
bulkreq.ocount = compat_ptr(addr);
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
+ if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
return -EFAULT;
- if ((count = bulkreq.icount) <= 0)
+ if (bulkreq.icount <= 0)
return -EINVAL;
if (bulkreq.ubuffer == NULL)
return -EINVAL;
+ breq.ubuffer = bulkreq.ubuffer;
+ breq.icount = bulkreq.icount;
+
+ /*
+ * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
+ * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect
+ * that *lastip contains either zero or the number of the last inode to
+ * be examined by the previous call and return results starting with
+ * the next inode after that. The new bulk request back end functions
+ * take the inode to start with, so we have to compute the startino
+ * parameter from lastino to maintain correct function. lastino == 0
+ * is a special case because it has traditionally meant "first inode
+ * in filesystem".
+ */
if (cmd == XFS_IOC_FSINUMBERS_32) {
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, inumbers_func);
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_inumbers(&breq, inumbers_func);
+ lastino = breq.startino - 1;
} else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
- int res;
-
- error = bs_one_func(mp, inlast, bulkreq.ubuffer,
- bs_one_size, NULL, &res);
+ breq.startino = lastino;
+ breq.icount = 1;
+ error = xfs_bulkstat_one(&breq, bs_one_func);
+ lastino = breq.startino;
} else if (cmd == XFS_IOC_FSBULKSTAT_32) {
- error = xfs_bulkstat(mp, &inlast, &count,
- bs_one_func, bs_one_size,
- bulkreq.ubuffer, &done);
- } else
+ breq.startino = lastino ? lastino + 1 : 0;
+ error = xfs_bulkstat(&breq, bs_one_func);
+ lastino = breq.startino - 1;
+ } else {
error = -EINVAL;
+ }
if (error)
return error;
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -EFAULT;
+ if (bulkreq.lastip != NULL &&
+ copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
+ return -EFAULT;
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -EFAULT;
- }
+ if (bulkreq.ocount != NULL &&
+ copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
+ return -EFAULT;
return 0;
}
@@ -577,6 +574,8 @@ xfs_file_compat_ioctl(
case XFS_IOC_ERROR_CLEARALL:
case FS_IOC_GETFSMAP:
case XFS_IOC_SCRUB_METADATA:
+ case XFS_IOC_BULKSTAT:
+ case XFS_IOC_INUMBERS:
return xfs_file_ioctl(filp, cmd, p);
#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
/*
@@ -674,7 +673,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSBULKSTAT_32:
case XFS_IOC_FSBULKSTAT_SINGLE_32:
case XFS_IOC_FSINUMBERS_32:
- return xfs_compat_ioc_bulkstat(mp, cmd, arg);
+ return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
case XFS_IOC_FD_TO_HANDLE_32:
case XFS_IOC_PATH_TO_HANDLE_32:
case XFS_IOC_PATH_TO_FSHANDLE_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index d28fa824284a..7985344d3aa6 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime {
__s32 tv_nsec; /* and nanoseconds */
} compat_xfs_bstime_t;
-typedef struct compat_xfs_bstat {
+struct compat_xfs_bstat {
__u64 bs_ino; /* inode number */
__u16 bs_mode; /* type and mode */
__u16 bs_nlink; /* number of links */
@@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat {
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
-} __compat_packed compat_xfs_bstat_t;
+} __compat_packed;
-typedef struct compat_xfs_fsop_bulkreq {
+struct compat_xfs_fsop_bulkreq {
compat_uptr_t lastip; /* last inode # pointer */
__s32 icount; /* count of entries in buffer */
compat_uptr_t ubuffer; /* user buffer for inode desc. */
compat_uptr_t ocount; /* output count pointer */
-} compat_xfs_fsop_bulkreq_t;
+};
#define XFS_IOC_FSBULKSTAT_32 \
_IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
@@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext {
xfs_off_t sx_offset; /* offset into file */
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
- compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */
+ struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */
} __compat_packed compat_xfs_swapext_t;
#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
@@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 {
#define XFS_IOC_FSGEOMETRY_V1_32 \
_IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-typedef struct compat_xfs_inogrp {
+struct compat_xfs_inogrp {
__u64 xi_startino; /* starting inode number */
__s32 xi_alloccount; /* # bits set in allocmask */
__u64 xi_allocmask; /* mask of allocated inodes */
-} __attribute__((packed)) compat_xfs_inogrp_t;
+} __attribute__((packed));
/* These growfs input structures have padding on the end, so must translate */
typedef struct compat_xfs_growfs_data {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 63d323916bba..3a4310d7cb59 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -4,7 +4,6 @@
* Copyright (c) 2016-2018 Christoph Hellwig.
* All Rights Reserved.
*/
-#include <linux/iomap.h>
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
@@ -12,7 +11,6 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_bmap_btree.h"
@@ -25,7 +23,6 @@
#include "xfs_inode_item.h"
#include "xfs_iomap.h"
#include "xfs_trace.h"
-#include "xfs_icache.h"
#include "xfs_quota.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
@@ -779,7 +776,7 @@ xfs_iomap_write_unwritten(
* complete here and might deadlock on the iolock.
*/
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
- XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+ XFS_TRANS_RESERVE, &tp);
if (error)
return error;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 74047bd0c1ae..ff3c1fae5357 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -10,30 +10,20 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_acl.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_symlink.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
-#include "xfs_trans_space.h"
#include "xfs_iomap.h"
-#include "xfs_defer.h"
-#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/security.h>
-#include <linux/iomap.h>
-#include <linux/slab.h>
#include <linux/iversion.h>
/*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 1e1a0af1dd34..a8a06bb78ea8 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -14,46 +14,66 @@
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
#include "xfs_itable.h"
#include "xfs_error.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_health.h"
/*
- * Return stat information for one inode.
- * Return 0 if ok, else errno.
+ * Bulk Stat
+ * =========
+ *
+ * Use the inode walking functions to fill out struct xfs_bulkstat for every
+ * allocated inode, then pass the stat information to some externally provided
+ * iteration function.
*/
-int
+
+struct xfs_bstat_chunk {
+ bulkstat_one_fmt_pf formatter;
+ struct xfs_ibulk *breq;
+ struct xfs_bulkstat *buf;
+};
+
+/*
+ * Fill out the bulkstat info for a single inode and report it somewhere.
+ *
+ * bc->breq->lastino is effectively the inode cursor as we walk through the
+ * filesystem. Therefore, we update it any time we need to move the cursor
+ * forward, regardless of whether or not we're sending any bstat information
+ * back to userspace. If the inode is internal metadata or, has been freed
+ * out from under us, we just simply keep going.
+ *
+ * However, if any other type of error happens we want to stop right where we
+ * are so that userspace will call back with exact number of the bad inode and
+ * we can send back an error code.
+ *
+ * Note that if the formatter tells us there's no space left in the buffer we
+ * move the cursor forward and abort the walk.
+ */
+STATIC int
xfs_bulkstat_one_int(
- struct xfs_mount *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ struct xfs_bstat_chunk *bc)
{
struct xfs_icdinode *dic; /* dinode core info pointer */
struct xfs_inode *ip; /* incore inode pointer */
struct inode *inode;
- struct xfs_bstat *buf; /* return buffer */
- int error = 0; /* error value */
+ struct xfs_bulkstat *buf = bc->buf;
+ int error = -EINVAL;
- *stat = BULKSTAT_RV_NOTHING;
+ if (xfs_internal_inum(mp, ino))
+ goto out_advance;
- if (!buffer || xfs_internal_inum(mp, ino))
- return -EINVAL;
-
- buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
- if (!buf)
- return -ENOMEM;
-
- error = xfs_iget(mp, NULL, ino,
+ error = xfs_iget(mp, tp, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
+ if (error == -ENOENT || error == -EINVAL)
+ goto out_advance;
if (error)
- goto out_free;
+ goto out;
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
@@ -64,37 +84,35 @@ xfs_bulkstat_one_int(
/* xfs_iget returns the following without needing
* further change.
*/
- buf->bs_projid_lo = dic->di_projid_lo;
- buf->bs_projid_hi = dic->di_projid_hi;
+ buf->bs_projectid = xfs_get_projid(ip);
buf->bs_ino = ino;
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
buf->bs_nlink = inode->i_nlink;
- buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
- buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
- buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
- buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
- buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
- buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+ buf->bs_atime = inode->i_atime.tv_sec;
+ buf->bs_atime_nsec = inode->i_atime.tv_nsec;
+ buf->bs_mtime = inode->i_mtime.tv_sec;
+ buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
+ buf->bs_ctime = inode->i_ctime.tv_sec;
+ buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+ buf->bs_btime = dic->di_crtime.t_sec;
+ buf->bs_btime_nsec = dic->di_crtime.t_nsec;
buf->bs_gen = inode->i_generation;
buf->bs_mode = inode->i_mode;
buf->bs_xflags = xfs_ip2xflags(ip);
- buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
+ buf->bs_extsize_blks = dic->di_extsize;
buf->bs_extents = dic->di_nextents;
- memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
xfs_bulkstat_health(ip, buf);
- buf->bs_dmevmask = dic->di_dmevmask;
- buf->bs_dmstate = dic->di_dmstate;
buf->bs_aextents = dic->di_anextents;
buf->bs_forkoff = XFS_IFORK_BOFF(ip);
+ buf->bs_version = XFS_BULKSTAT_VERSION_V5;
if (dic->di_version == 3) {
if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
- buf->bs_cowextsize = dic->di_cowextsize <<
- mp->m_sb.sb_blocklog;
+ buf->bs_cowextsize_blks = dic->di_cowextsize;
}
switch (dic->di_format) {
@@ -118,385 +136,121 @@ xfs_bulkstat_one_int(
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_irele(ip);
- error = formatter(buffer, ubsize, ubused, buf);
- if (!error)
- *stat = BULKSTAT_RV_DIDONE;
+ error = bc->formatter(bc->breq, buf);
+ if (error == XFS_IBULK_ABORT)
+ goto out_advance;
+ if (error)
+ goto out;
- out_free:
- kmem_free(buf);
+out_advance:
+ /*
+ * Advance the cursor to the inode that comes after the one we just
+ * looked at. We want the caller to move along if the bulkstat
+ * information was copied successfully; if we tried to grab the inode
+ * but it's no longer allocated; or if it's internal metadata.
+ */
+ bc->breq->startino = ino + 1;
+out:
return error;
}
-/* Return 0 on success or positive error */
-STATIC int
-xfs_bulkstat_one_fmt(
- void __user *ubuffer,
- int ubsize,
- int *ubused,
- const xfs_bstat_t *buffer)
-{
- if (ubsize < sizeof(*buffer))
- return -ENOMEM;
- if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
- return -EFAULT;
- if (ubused)
- *ubused = sizeof(*buffer);
- return 0;
-}
-
+/* Bulkstat a single inode. */
int
xfs_bulkstat_one(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
+ struct xfs_ibulk *breq,
+ bulkstat_one_fmt_pf formatter)
{
- return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt, ubused, stat);
-}
+ struct xfs_bstat_chunk bc = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ int error;
-/*
- * Loop over all clusters in a chunk for a given incore inode allocation btree
- * record. Do a readahead if there are any allocated inodes in that cluster.
- */
-STATIC void
-xfs_bulkstat_ichunk_ra(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- struct xfs_inobt_rec_incore *irec)
-{
- xfs_agblock_t agbno;
- struct blk_plug plug;
- int i; /* inode chunk index */
-
- agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
-
- blk_start_plug(&plug);
- for (i = 0; i < XFS_INODES_PER_CHUNK;
- i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) {
- if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) &
- ~irec->ir_free) {
- xfs_btree_reada_bufs(mp, agno, agbno,
- mp->m_blocks_per_cluster,
- &xfs_inode_buf_ops);
- }
- }
- blk_finish_plug(&plug);
-}
+ ASSERT(breq->icount == 1);
-/*
- * Lookup the inode chunk that the given inode lives in and then get the record
- * if we found the chunk. If the inode was not the last in the chunk and there
- * are some left allocated, update the data for the pointed-to record as well as
- * return the count of grabbed inodes.
- */
-STATIC int
-xfs_bulkstat_grab_ichunk(
- struct xfs_btree_cur *cur, /* btree cursor */
- xfs_agino_t agino, /* starting inode of chunk */
- int *icount,/* return # of inodes grabbed */
- struct xfs_inobt_rec_incore *irec) /* btree record */
-{
- int idx; /* index into inode chunk */
- int stat;
- int error = 0;
-
- /* Lookup the inode chunk that this inode lives in */
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
- if (error)
- return error;
- if (!stat) {
- *icount = 0;
- return error;
- }
+ bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+ KM_SLEEP | KM_MAYFAIL);
+ if (!bc.buf)
+ return -ENOMEM;
- /* Get the record, should always work */
- error = xfs_inobt_get_rec(cur, irec, &stat);
- if (error)
- return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
+ error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
- /* Check if the record contains the inode in request */
- if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
- *icount = 0;
- return 0;
- }
+ kmem_free(bc.buf);
- idx = agino - irec->ir_startino + 1;
- if (idx < XFS_INODES_PER_CHUNK &&
- (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) {
- int i;
-
- /* We got a right chunk with some left inodes allocated at it.
- * Grab the chunk record. Mark all the uninteresting inodes
- * free -- because they're before our start point.
- */
- for (i = 0; i < idx; i++) {
- if (XFS_INOBT_MASK(i) & ~irec->ir_free)
- irec->ir_freecount++;
- }
-
- irec->ir_free |= xfs_inobt_maskn(0, idx);
- *icount = irec->ir_count - irec->ir_freecount;
- }
+ /*
+ * If we reported one inode to userspace then we abort because we hit
+ * the end of the buffer. Don't leak that back to userspace.
+ */
+ if (error == XFS_IWALK_ABORT)
+ error = 0;
- return 0;
+ return error;
}
-#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
-
-struct xfs_bulkstat_agichunk {
- char __user **ac_ubuffer;/* pointer into user's buffer */
- int ac_ubleft; /* bytes left in user's buffer */
- int ac_ubelem; /* spaces used in user's buffer */
-};
-
-/*
- * Process inodes in chunk with a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- */
static int
-xfs_bulkstat_ag_ichunk(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- struct xfs_inobt_rec_incore *irbp,
- bulkstat_one_pf formatter,
- size_t statstruct_size,
- struct xfs_bulkstat_agichunk *acp,
- xfs_agino_t *last_agino)
+xfs_bulkstat_iwalk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
{
- char __user **ubufp = acp->ac_ubuffer;
- int chunkidx;
- int error = 0;
- xfs_agino_t agino = irbp->ir_startino;
-
- for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
- chunkidx++, agino++) {
- int fmterror;
- int ubused;
-
- /* inode won't fit in buffer, we are done */
- if (acp->ac_ubleft < statstruct_size)
- break;
-
- /* Skip if this inode is free */
- if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
- continue;
-
- /* Get the inode and fill in a single buffer */
- ubused = statstruct_size;
- error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
- *ubufp, acp->ac_ubleft, &ubused, &fmterror);
-
- if (fmterror == BULKSTAT_RV_GIVEUP ||
- (error && error != -ENOENT && error != -EINVAL)) {
- acp->ac_ubleft = 0;
- ASSERT(error);
- break;
- }
-
- /* be careful not to leak error if at end of chunk */
- if (fmterror == BULKSTAT_RV_NOTHING || error) {
- error = 0;
- continue;
- }
-
- *ubufp += ubused;
- acp->ac_ubleft -= ubused;
- acp->ac_ubelem++;
- }
-
- /*
- * Post-update *last_agino. At this point, agino will always point one
- * inode past the last inode we processed successfully. Hence we
- * substract that inode when setting the *last_agino cursor so that we
- * return the correct cookie to userspace. On the next bulkstat call,
- * the inode under the lastino cookie will be skipped as we have already
- * processed it here.
- */
- *last_agino = agino - 1;
+ int error;
+ error = xfs_bulkstat_one_int(mp, tp, ino, data);
+ /* bulkstat just skips over missing inodes */
+ if (error == -ENOENT || error == -EINVAL)
+ return 0;
return error;
}
/*
- * Return stat information in bulk (by-inode) for the filesystem.
+ * Check the incoming lastino parameter.
+ *
+ * We allow any inode value that could map to physical space inside the
+ * filesystem because if there are no inodes there, bulkstat moves on to the
+ * next chunk. In other words, the magic agino value of zero takes us to the
+ * first chunk in the AG, and an agino value past the end of the AG takes us to
+ * the first chunk in the next AG.
+ *
+ * Therefore we can end early if the requested inode is beyond the end of the
+ * filesystem or doesn't map properly.
*/
-int /* error status */
-xfs_bulkstat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastinop, /* last inode returned */
- int *ubcountp, /* size of buffer/count returned */
- bulkstat_one_pf formatter, /* func that'd fill a single buf */
- size_t statstruct_size, /* sizeof struct filling */
- char __user *ubuffer, /* buffer with inode stats */
- int *done) /* 1 if there are more stats to get */
+static inline bool
+xfs_bulkstat_already_done(
+ struct xfs_mount *mp,
+ xfs_ino_t startino)
{
- xfs_buf_t *agbp; /* agi header buffer */
- xfs_agino_t agino; /* inode # in allocation group */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
- xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
- int nirbuf; /* size of irbuf */
- int ubcount; /* size of user's buffer */
- struct xfs_bulkstat_agichunk ac;
- int error = 0;
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino);
- /*
- * Get the last inode value, see if there's nothing to do.
- */
- agno = XFS_INO_TO_AGNO(mp, *lastinop);
- agino = XFS_INO_TO_AGINO(mp, *lastinop);
- if (agno >= mp->m_sb.sb_agcount ||
- *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
- *done = 1;
- *ubcountp = 0;
- return 0;
- }
+ return agno >= mp->m_sb.sb_agcount ||
+ startino != XFS_AGINO_TO_INO(mp, agno, agino);
+}
- ubcount = *ubcountp; /* statstruct's */
- ac.ac_ubuffer = &ubuffer;
- ac.ac_ubleft = ubcount * statstruct_size; /* bytes */;
- ac.ac_ubelem = 0;
+/* Return stat information in bulk (by-inode) for the filesystem. */
+int
+xfs_bulkstat(
+ struct xfs_ibulk *breq,
+ bulkstat_one_fmt_pf formatter)
+{
+ struct xfs_bstat_chunk bc = {
+ .formatter = formatter,
+ .breq = breq,
+ };
+ int error;
- *ubcountp = 0;
- *done = 0;
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
- irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP);
- if (!irbuf)
+ bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
+ KM_SLEEP | KM_MAYFAIL);
+ if (!bc.buf)
return -ENOMEM;
- nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
- /*
- * Loop over the allocation groups, starting from the last
- * inode returned; 0 means start of the allocation group.
- */
- while (agno < mp->m_sb.sb_agcount) {
- struct xfs_inobt_rec_incore *irbp = irbuf;
- struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
- bool end_of_ag = false;
- int icount = 0;
- int stat;
-
- error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error)
- break;
- /*
- * Allocate and initialize a btree cursor for ialloc btree.
- */
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
- XFS_BTNUM_INO);
- if (agino > 0) {
- /*
- * In the middle of an allocation group, we need to get
- * the remainder of the chunk we're in.
- */
- struct xfs_inobt_rec_incore r;
-
- error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
- if (error)
- goto del_cursor;
- if (icount) {
- irbp->ir_startino = r.ir_startino;
- irbp->ir_holemask = r.ir_holemask;
- irbp->ir_count = r.ir_count;
- irbp->ir_freecount = r.ir_freecount;
- irbp->ir_free = r.ir_free;
- irbp++;
- }
- /* Increment to the next record */
- error = xfs_btree_increment(cur, 0, &stat);
- } else {
- /* Start of ag. Lookup the first inode chunk */
- error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
- }
- if (error || stat == 0) {
- end_of_ag = true;
- goto del_cursor;
- }
-
- /*
- * Loop through inode btree records in this ag,
- * until we run out of inodes or space in the buffer.
- */
- while (irbp < irbufend && icount < ubcount) {
- struct xfs_inobt_rec_incore r;
-
- error = xfs_inobt_get_rec(cur, &r, &stat);
- if (error || stat == 0) {
- end_of_ag = true;
- goto del_cursor;
- }
-
- /*
- * If this chunk has any allocated inodes, save it.
- * Also start read-ahead now for this chunk.
- */
- if (r.ir_freecount < r.ir_count) {
- xfs_bulkstat_ichunk_ra(mp, agno, &r);
- irbp->ir_startino = r.ir_startino;
- irbp->ir_holemask = r.ir_holemask;
- irbp->ir_count = r.ir_count;
- irbp->ir_freecount = r.ir_freecount;
- irbp->ir_free = r.ir_free;
- irbp++;
- icount += r.ir_count - r.ir_freecount;
- }
- error = xfs_btree_increment(cur, 0, &stat);
- if (error || stat == 0) {
- end_of_ag = true;
- goto del_cursor;
- }
- cond_resched();
- }
-
- /*
- * Drop the btree buffers and the agi buffer as we can't hold any
- * of the locks these represent when calling iget. If there is a
- * pending error, then we are done.
- */
-del_cursor:
- xfs_btree_del_cursor(cur, error);
- xfs_buf_relse(agbp);
- if (error)
- break;
- /*
- * Now format all the good inodes into the user's buffer. The
- * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
- * for the next loop iteration.
- */
- irbufend = irbp;
- for (irbp = irbuf;
- irbp < irbufend && ac.ac_ubleft >= statstruct_size;
- irbp++) {
- error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
- formatter, statstruct_size, &ac,
- &agino);
- if (error)
- break;
-
- cond_resched();
- }
-
- /*
- * If we've run out of space or had a formatting error, we
- * are now done
- */
- if (ac.ac_ubleft < statstruct_size || error)
- break;
-
- if (end_of_ag) {
- agno++;
- agino = 0;
- }
- }
- /*
- * Done, we're either out of filesystem or space to put the data.
- */
- kmem_free(irbuf);
- *ubcountp = ac.ac_ubelem;
+ error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags,
+ xfs_bulkstat_iwalk, breq->icount, &bc);
+
+ kmem_free(bc.buf);
/*
* We found some inodes, so clear the error status and return them.
@@ -505,135 +259,136 @@ del_cursor:
* triggered again and propagated to userspace as there will be no
* formatted inodes in the buffer.
*/
- if (ac.ac_ubelem)
+ if (breq->ocount > 0)
error = 0;
- /*
- * If we ran out of filesystem, lastino will point off the end of
- * the filesystem so the next call will return immediately.
- */
- *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
- if (agno >= mp->m_sb.sb_agcount)
- *done = 1;
-
return error;
}
-int
-xfs_inumbers_fmt(
- void __user *ubuffer, /* buffer to write to */
- const struct xfs_inogrp *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written) /* # of bytes written */
+/* Convert bulkstat (v5) to bstat (v1). */
+void
+xfs_bulkstat_to_bstat(
+ struct xfs_mount *mp,
+ struct xfs_bstat *bs1,
+ const struct xfs_bulkstat *bstat)
{
- if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
- return -EFAULT;
- *written = count * sizeof(*buffer);
- return 0;
+ memset(bs1, 0, sizeof(struct xfs_bstat));
+ bs1->bs_ino = bstat->bs_ino;
+ bs1->bs_mode = bstat->bs_mode;
+ bs1->bs_nlink = bstat->bs_nlink;
+ bs1->bs_uid = bstat->bs_uid;
+ bs1->bs_gid = bstat->bs_gid;
+ bs1->bs_rdev = bstat->bs_rdev;
+ bs1->bs_blksize = bstat->bs_blksize;
+ bs1->bs_size = bstat->bs_size;
+ bs1->bs_atime.tv_sec = bstat->bs_atime;
+ bs1->bs_mtime.tv_sec = bstat->bs_mtime;
+ bs1->bs_ctime.tv_sec = bstat->bs_ctime;
+ bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
+ bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
+ bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
+ bs1->bs_blocks = bstat->bs_blocks;
+ bs1->bs_xflags = bstat->bs_xflags;
+ bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
+ bs1->bs_extents = bstat->bs_extents;
+ bs1->bs_gen = bstat->bs_gen;
+ bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
+ bs1->bs_forkoff = bstat->bs_forkoff;
+ bs1->bs_projid_hi = bstat->bs_projectid >> 16;
+ bs1->bs_sick = bstat->bs_sick;
+ bs1->bs_checked = bstat->bs_checked;
+ bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
+ bs1->bs_dmevmask = 0;
+ bs1->bs_dmstate = 0;
+ bs1->bs_aextents = bstat->bs_aextents;
+}
+
+struct xfs_inumbers_chunk {
+ inumbers_fmt_pf formatter;
+ struct xfs_ibulk *breq;
+};
+
+/*
+ * INUMBERS
+ * ========
+ * This is how we export inode btree records to userspace, so that XFS tools
+ * can figure out where inodes are allocated.
+ */
+
+/*
+ * Format the inode group structure and report it somewhere.
+ *
+ * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
+ * through the filesystem so we move it forward unless there was a runtime
+ * error. If the formatter tells us the buffer is now full we also move the
+ * cursor forward and abort the walk.
+ */
+STATIC int
+xfs_inumbers_walk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ const struct xfs_inobt_rec_incore *irec,
+ void *data)
+{
+ struct xfs_inumbers inogrp = {
+ .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
+ .xi_alloccount = irec->ir_count - irec->ir_freecount,
+ .xi_allocmask = ~irec->ir_free,
+ .xi_version = XFS_INUMBERS_VERSION_V5,
+ };
+ struct xfs_inumbers_chunk *ic = data;
+ int error;
+
+ error = ic->formatter(ic->breq, &inogrp);
+ if (error && error != XFS_IBULK_ABORT)
+ return error;
+
+ ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
+ XFS_INODES_PER_CHUNK;
+ return error;
}
/*
* Return inode number table for the filesystem.
*/
-int /* error status */
+int
xfs_inumbers(
- struct xfs_mount *mp,/* mount point for filesystem */
- xfs_ino_t *lastino,/* last inode returned */
- int *count,/* size of buffer/count returned */
- void __user *ubuffer,/* buffer with inode descriptions */
+ struct xfs_ibulk *breq,
inumbers_fmt_pf formatter)
{
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino);
- xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino);
- struct xfs_btree_cur *cur = NULL;
- struct xfs_buf *agbp = NULL;
- struct xfs_inogrp *buffer;
- int bcount;
- int left = *count;
- int bufidx = 0;
+ struct xfs_inumbers_chunk ic = {
+ .formatter = formatter,
+ .breq = breq,
+ };
int error = 0;
- *count = 0;
- if (agno >= mp->m_sb.sb_agcount ||
- *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
- return error;
+ if (xfs_bulkstat_already_done(breq->mp, breq->startino))
+ return 0;
- bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer)));
- buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP);
- do {
- struct xfs_inobt_rec_incore r;
- int stat;
-
- if (!agbp) {
- error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
- if (error)
- break;
-
- cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
- XFS_BTNUM_INO);
- error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
- &stat);
- if (error)
- break;
- if (!stat)
- goto next_ag;
- }
-
- error = xfs_inobt_get_rec(cur, &r, &stat);
- if (error)
- break;
- if (!stat)
- goto next_ag;
-
- agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
- buffer[bufidx].xi_startino =
- XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
- buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
- buffer[bufidx].xi_allocmask = ~r.ir_free;
- if (++bufidx == bcount) {
- long written;
-
- error = formatter(ubuffer, buffer, bufidx, &written);
- if (error)
- break;
- ubuffer += written;
- *count += bufidx;
- bufidx = 0;
- }
- if (!--left)
- break;
-
- error = xfs_btree_increment(cur, 0, &stat);
- if (error)
- break;
- if (stat)
- continue;
-
-next_ag:
- xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
- cur = NULL;
- xfs_buf_relse(agbp);
- agbp = NULL;
- agino = 0;
- agno++;
- } while (agno < mp->m_sb.sb_agcount);
-
- if (!error) {
- if (bufidx) {
- long written;
-
- error = formatter(ubuffer, buffer, bufidx, &written);
- if (!error)
- *count += bufidx;
- }
- *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
- }
+ error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags,
+ xfs_inumbers_walk, breq->icount, &ic);
- kmem_free(buffer);
- if (cur)
- xfs_btree_del_cursor(cur, error);
- if (agbp)
- xfs_buf_relse(agbp);
+ /*
+ * We found some inode groups, so clear the error status and return
+ * them. The lastino pointer will point directly at the inode that
+ * triggered any error that occurred, so on the next call the error
+ * will be triggered again and propagated to userspace as there will be
+ * no formatted inode groups in the buffer.
+ */
+ if (breq->ocount > 0)
+ error = 0;
return error;
}
+
+/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
+void
+xfs_inumbers_to_inogrp(
+ struct xfs_inogrp *ig1,
+ const struct xfs_inumbers *ig)
+{
+ ig1->xi_startino = ig->xi_startino;
+ ig1->xi_alloccount = ig->xi_alloccount;
+ ig1->xi_allocmask = ig->xi_allocmask;
+}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 8a822285b671..e90c1fc5b981 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -5,83 +5,55 @@
#ifndef __XFS_ITABLE_H__
#define __XFS_ITABLE_H__
-/*
- * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat
- * structures (by the dmi library). This is a pointer to a formatter function
- * that will iget the inode and fill in the appropriate structure.
- * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c
- */
-typedef int (*bulkstat_one_pf)(struct xfs_mount *mp,
- xfs_ino_t ino,
- void __user *buffer,
- int ubsize,
- int *ubused,
- int *stat);
+/* In-memory representation of a userspace request for batch inode data. */
+struct xfs_ibulk {
+ struct xfs_mount *mp;
+ void __user *ubuffer; /* user output buffer */
+ xfs_ino_t startino; /* start with this inode */
+ unsigned int icount; /* number of elements in ubuffer */
+ unsigned int ocount; /* number of records returned */
+ unsigned int flags; /* see XFS_IBULK_FLAG_* */
+};
+
+/* Only iterate within the same AG as startino */
+#define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG)
+
+/* Return value that means we want to abort the walk. */
+#define XFS_IBULK_ABORT (XFS_IWALK_ABORT)
/*
- * Values for stat return value.
+ * Advance the user buffer pointer by one record of the given size. If the
+ * buffer is now full, return the appropriate error code.
*/
-#define BULKSTAT_RV_NOTHING 0
-#define BULKSTAT_RV_DIDONE 1
-#define BULKSTAT_RV_GIVEUP 2
+static inline int
+xfs_ibulk_advance(
+ struct xfs_ibulk *breq,
+ size_t bytes)
+{
+ char __user *b = breq->ubuffer;
+
+ breq->ubuffer = b + bytes;
+ breq->ocount++;
+ return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0;
+}
/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
-int /* error status */
-xfs_bulkstat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *lastino, /* last inode returned */
- int *count, /* size of buffer/count returned */
- bulkstat_one_pf formatter, /* func that'd fill a single buf */
- size_t statstruct_size,/* sizeof struct that we're filling */
- char __user *ubuffer,/* buffer with inode stats */
- int *done); /* 1 if there are more stats to get */
-
-typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
- void __user *ubuffer, /* buffer to write to */
- int ubsize, /* remaining user buffer sz */
- int *ubused, /* bytes used by formatter */
- const xfs_bstat_t *buffer); /* buffer to read from */
-
-int
-xfs_bulkstat_one_int(
- xfs_mount_t *mp,
- xfs_ino_t ino,
- void __user *buffer,
- int ubsize,
- bulkstat_one_fmt_pf formatter,
- int *ubused,
- int *stat);
-int
-xfs_bulkstat_one(
- xfs_mount_t *mp,
- xfs_ino_t ino,
- void __user *buffer,
- int ubsize,
- int *ubused,
- int *stat);
+typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
+ const struct xfs_bulkstat *bstat);
-typedef int (*inumbers_fmt_pf)(
- void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written); /* # of bytes written */
+int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
+void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1,
+ const struct xfs_bulkstat *bstat);
-int
-xfs_inumbers_fmt(
- void __user *ubuffer, /* buffer to write to */
- const xfs_inogrp_t *buffer, /* buffer to read from */
- long count, /* # of elements to read */
- long *written); /* # of bytes written */
+typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq,
+ const struct xfs_inumbers *igrp);
-int /* error status */
-xfs_inumbers(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t *last, /* last inode returned */
- int *count, /* size of buffer/count returned */
- void __user *buffer, /* buffer with inode info */
- inumbers_fmt_pf formatter);
+int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter);
+void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1,
+ const struct xfs_inumbers *ig);
#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
new file mode 100644
index 000000000000..8c7d727149ea
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_iwalk.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_icache.h"
+#include "xfs_health.h"
+#include "xfs_trans.h"
+#include "xfs_pwork.h"
+
+/*
+ * Walking Inodes in the Filesystem
+ * ================================
+ *
+ * This iterator function walks a subset of filesystem inodes in increasing
+ * order from @startino until there are no more inodes. For each allocated
+ * inode it finds, it calls a walk function with the relevant inode number and
+ * a pointer to caller-provided data. The walk function can return the usual
+ * negative error code to stop the iteration; 0 to continue the iteration; or
+ * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the
+ * caller.
+ *
+ * Internally, we allow the walk function to do anything, which means that we
+ * cannot maintain the inobt cursor or our lock on the AGI buffer. We
+ * therefore cache the inobt records in kernel memory and only call the walk
+ * function when our memory buffer is full. @nr_recs is the number of records
+ * that we've cached, and @sz_recs is the size of our cache.
+ *
+ * It is the responsibility of the walk function to ensure it accesses
+ * allocated inodes, as the inobt records may be stale by the time they are
+ * acted upon.
+ */
+
+struct xfs_iwalk_ag {
+ /* parallel work control data; will be null if single threaded */
+ struct xfs_pwork pwork;
+
+ struct xfs_mount *mp;
+ struct xfs_trans *tp;
+
+ /* Where do we start the traversal? */
+ xfs_ino_t startino;
+
+ /* Array of inobt records we cache. */
+ struct xfs_inobt_rec_incore *recs;
+
+ /* Number of entries allocated for the @recs array. */
+ unsigned int sz_recs;
+
+ /* Number of entries in the @recs array that are in use. */
+ unsigned int nr_recs;
+
+ /* Inode walk function and data pointer. */
+ xfs_iwalk_fn iwalk_fn;
+ xfs_inobt_walk_fn inobt_walk_fn;
+ void *data;
+
+ /*
+ * Make it look like the inodes up to startino are free so that
+ * bulkstat can start its inode iteration at the correct place without
+ * needing to special case everywhere.
+ */
+ unsigned int trim_start:1;
+
+ /* Skip empty inobt records? */
+ unsigned int skip_empty:1;
+};
+
+/*
+ * Loop over all clusters in a chunk for a given incore inode allocation btree
+ * record. Do a readahead if there are any allocated inodes in that cluster.
+ */
+STATIC void
+xfs_iwalk_ichunk_ra(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irec)
+{
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_agblock_t agbno;
+ struct blk_plug plug;
+ int i; /* inode chunk index */
+
+ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
+
+ blk_start_plug(&plug);
+ for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) {
+ xfs_inofree_t imask;
+
+ imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
+ if (imask & ~irec->ir_free) {
+ xfs_btree_reada_bufs(mp, agno, agbno,
+ igeo->blocks_per_cluster,
+ &xfs_inode_buf_ops);
+ }
+ agbno += igeo->blocks_per_cluster;
+ }
+ blk_finish_plug(&plug);
+}
+
+/*
+ * Set the bits in @irec's free mask that correspond to the inodes before
+ * @agino so that we skip them. This is how we restart an inode walk that was
+ * interrupted in the middle of an inode record.
+ */
+STATIC void
+xfs_iwalk_adjust_start(
+ xfs_agino_t agino, /* starting inode of chunk */
+ struct xfs_inobt_rec_incore *irec) /* btree record */
+{
+ int idx; /* index into inode chunk */
+ int i;
+
+ idx = agino - irec->ir_startino;
+
+ /*
+ * We got a right chunk with some left inodes allocated at it. Grab
+ * the chunk record. Mark all the uninteresting inodes free because
+ * they're before our start point.
+ */
+ for (i = 0; i < idx; i++) {
+ if (XFS_INOBT_MASK(i) & ~irec->ir_free)
+ irec->ir_freecount++;
+ }
+
+ irec->ir_free |= xfs_inobt_maskn(0, idx);
+}
+
+/* Allocate memory for a walk. */
+STATIC int
+xfs_iwalk_alloc(
+ struct xfs_iwalk_ag *iwag)
+{
+ size_t size;
+
+ ASSERT(iwag->recs == NULL);
+ iwag->nr_recs = 0;
+
+ /* Allocate a prefetch buffer for inobt records. */
+ size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
+ iwag->recs = kmem_alloc(size, KM_MAYFAIL);
+ if (iwag->recs == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/* Free memory we allocated for a walk. */
+STATIC void
+xfs_iwalk_free(
+ struct xfs_iwalk_ag *iwag)
+{
+ kmem_free(iwag->recs);
+ iwag->recs = NULL;
+}
+
+/* For each inuse inode in each cached inobt record, call our function. */
+STATIC int
+xfs_iwalk_ag_recs(
+ struct xfs_iwalk_ag *iwag)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ xfs_ino_t ino;
+ unsigned int i, j;
+ xfs_agnumber_t agno;
+ int error;
+
+ agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+ for (i = 0; i < iwag->nr_recs; i++) {
+ struct xfs_inobt_rec_incore *irec = &iwag->recs[i];
+
+ trace_xfs_iwalk_ag_rec(mp, agno, irec);
+
+ if (xfs_pwork_want_abort(&iwag->pwork))
+ return 0;
+
+ if (iwag->inobt_walk_fn) {
+ error = iwag->inobt_walk_fn(mp, tp, agno, irec,
+ iwag->data);
+ if (error)
+ return error;
+ }
+
+ if (!iwag->iwalk_fn)
+ continue;
+
+ for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
+ if (xfs_pwork_want_abort(&iwag->pwork))
+ return 0;
+
+ /* Skip if this inode is free */
+ if (XFS_INOBT_MASK(j) & irec->ir_free)
+ continue;
+
+ /* Otherwise call our function. */
+ ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j);
+ error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
+ if (error)
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+/* Delete cursor and let go of AGI. */
+static inline void
+xfs_iwalk_del_inobt(
+ struct xfs_trans *tp,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp,
+ int error)
+{
+ if (*curpp) {
+ xfs_btree_del_cursor(*curpp, error);
+ *curpp = NULL;
+ }
+ if (*agi_bpp) {
+ xfs_trans_brelse(tp, *agi_bpp);
+ *agi_bpp = NULL;
+ }
+}
+
+/*
+ * Set ourselves up for walking inobt records starting from a given point in
+ * the filesystem.
+ *
+ * If caller passed in a nonzero start inode number, load the record from the
+ * inobt and make the record look like all the inodes before agino are free so
+ * that we skip them, and then move the cursor to the next inobt record. This
+ * is how we support starting an iwalk in the middle of an inode chunk.
+ *
+ * If the caller passed in a start number of zero, move the cursor to the first
+ * inobt record.
+ *
+ * The caller is responsible for cleaning up the cursor and buffer pointer
+ * regardless of the error status.
+ */
+STATIC int
+xfs_iwalk_ag_start(
+ struct xfs_iwalk_ag *iwag,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp,
+ int *has_more)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ struct xfs_inobt_rec_incore *irec;
+ int error;
+
+ /* Set up a fresh cursor and empty the inobt cache. */
+ iwag->nr_recs = 0;
+ error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+ if (error)
+ return error;
+
+ /* Starting at the beginning of the AG? That's easy! */
+ if (agino == 0)
+ return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more);
+
+ /*
+ * Otherwise, we have to grab the inobt record where we left off, stuff
+ * the record into our cache, and then see if there are more records.
+ * We require a lookup cache of at least two elements so that the
+ * caller doesn't have to deal with tearing down the cursor to walk the
+ * records.
+ */
+ error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more);
+ if (error)
+ return error;
+
+ /*
+ * If the LE lookup at @agino yields no records, jump ahead to the
+ * inobt cursor increment to see if there are more records to process.
+ */
+ if (!*has_more)
+ goto out_advance;
+
+ /* Get the record, should always work */
+ irec = &iwag->recs[iwag->nr_recs];
+ error = xfs_inobt_get_rec(*curpp, irec, has_more);
+ if (error)
+ return error;
+ XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1);
+
+ /*
+ * If the LE lookup yielded an inobt record before the cursor position,
+ * skip it and see if there's another one after it.
+ */
+ if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ goto out_advance;
+
+ /*
+ * If agino fell in the middle of the inode record, make it look like
+ * the inodes up to agino are free so that we don't return them again.
+ */
+ if (iwag->trim_start)
+ xfs_iwalk_adjust_start(agino, irec);
+
+ /*
+ * The prefetch calculation is supposed to give us a large enough inobt
+ * record cache that grab_ichunk can stage a partial first record and
+ * the loop body can cache a record without having to check for cache
+ * space until after it reads an inobt record.
+ */
+ iwag->nr_recs++;
+ ASSERT(iwag->nr_recs < iwag->sz_recs);
+
+out_advance:
+ return xfs_btree_increment(*curpp, 0, has_more);
+}
+
+/*
+ * The inobt record cache is full, so preserve the inobt cursor state and
+ * run callbacks on the cached inobt records. When we're done, restore the
+ * cursor state to wherever the cursor would have been had the cache not been
+ * full (and therefore we could've just incremented the cursor) if *@has_more
+ * is true. On exit, *@has_more will indicate whether or not the caller should
+ * try for more inode records.
+ */
+STATIC int
+xfs_iwalk_run_callbacks(
+ struct xfs_iwalk_ag *iwag,
+ xfs_agnumber_t agno,
+ struct xfs_btree_cur **curpp,
+ struct xfs_buf **agi_bpp,
+ int *has_more)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ struct xfs_inobt_rec_incore *irec;
+ xfs_agino_t restart;
+ int error;
+
+ ASSERT(iwag->nr_recs > 0);
+
+ /* Delete cursor but remember the last record we cached... */
+ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
+ irec = &iwag->recs[iwag->nr_recs - 1];
+ restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+
+ error = xfs_iwalk_ag_recs(iwag);
+ if (error)
+ return error;
+
+ /* ...empty the cache... */
+ iwag->nr_recs = 0;
+
+ if (!has_more)
+ return 0;
+
+ /* ...and recreate the cursor just past where we left off. */
+ error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
+ if (error)
+ return error;
+
+ return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+}
+
+/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
+STATIC int
+xfs_iwalk_ag(
+ struct xfs_iwalk_ag *iwag)
+{
+ struct xfs_mount *mp = iwag->mp;
+ struct xfs_trans *tp = iwag->tp;
+ struct xfs_buf *agi_bp = NULL;
+ struct xfs_btree_cur *cur = NULL;
+ xfs_agnumber_t agno;
+ xfs_agino_t agino;
+ int has_more;
+ int error = 0;
+
+ /* Set up our cursor at the right place in the inode btree. */
+ agno = XFS_INO_TO_AGNO(mp, iwag->startino);
+ agino = XFS_INO_TO_AGINO(mp, iwag->startino);
+ error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more);
+
+ while (!error && has_more) {
+ struct xfs_inobt_rec_incore *irec;
+
+ cond_resched();
+ if (xfs_pwork_want_abort(&iwag->pwork))
+ goto out;
+
+ /* Fetch the inobt record. */
+ irec = &iwag->recs[iwag->nr_recs];
+ error = xfs_inobt_get_rec(cur, irec, &has_more);
+ if (error || !has_more)
+ break;
+
+ /* No allocated inodes in this chunk; skip it. */
+ if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
+ error = xfs_btree_increment(cur, 0, &has_more);
+ if (error)
+ break;
+ continue;
+ }
+
+ /*
+ * Start readahead for this inode chunk in anticipation of
+ * walking the inodes.
+ */
+ if (iwag->iwalk_fn)
+ xfs_iwalk_ichunk_ra(mp, agno, irec);
+
+ /*
+ * If there's space in the buffer for more records, increment
+ * the btree cursor and grab more.
+ */
+ if (++iwag->nr_recs < iwag->sz_recs) {
+ error = xfs_btree_increment(cur, 0, &has_more);
+ if (error || !has_more)
+ break;
+ continue;
+ }
+
+ /*
+ * Otherwise, we need to save cursor state and run the callback
+ * function on the cached records. The run_callbacks function
+ * is supposed to return a cursor pointing to the record where
+ * we would be if we had been able to increment like above.
+ */
+ ASSERT(has_more);
+ error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp,
+ &has_more);
+ }
+
+ if (iwag->nr_recs == 0 || error)
+ goto out;
+
+ /* Walk the unprocessed records in the cache. */
+ error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more);
+
+out:
+ xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
+ return error;
+}
+
+/*
+ * We experimentally determined that the reduction in ioctl call overhead
+ * diminishes when userspace asks for more than 2048 inodes, so we'll cap
+ * prefetch at this point.
+ */
+#define IWALK_MAX_INODE_PREFETCH (2048U)
+
+/*
+ * Given the number of inodes to prefetch, set the number of inobt records that
+ * we cache in memory, which controls the number of inodes we try to read
+ * ahead. Set the maximum if @inodes == 0.
+ */
+static inline unsigned int
+xfs_iwalk_prefetch(
+ unsigned int inodes)
+{
+ unsigned int inobt_records;
+
+ /*
+ * If the caller didn't tell us the number of inodes they wanted,
+ * assume the maximum prefetch possible for best performance.
+ * Otherwise, cap prefetch at that maximum so that we don't start an
+ * absurd amount of prefetch.
+ */
+ if (inodes == 0)
+ inodes = IWALK_MAX_INODE_PREFETCH;
+ inodes = min(inodes, IWALK_MAX_INODE_PREFETCH);
+
+ /* Round the inode count up to a full chunk. */
+ inodes = round_up(inodes, XFS_INODES_PER_CHUNK);
+
+ /*
+ * In order to convert the number of inodes to prefetch into an
+ * estimate of the number of inobt records to cache, we require a
+ * conversion factor that reflects our expectations of the average
+ * loading factor of an inode chunk. Based on data gathered, most
+ * (but not all) filesystems manage to keep the inode chunks totally
+ * full, so we'll underestimate slightly so that our readahead will
+ * still deliver the performance we want on aging filesystems:
+ *
+ * inobt = inodes / (INODES_PER_CHUNK * (4 / 5));
+ *
+ * The funny math is to avoid integer division.
+ */
+ inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK);
+
+ /*
+ * Allocate enough space to prefetch at least two inobt records so that
+ * we can cache both the record where the iwalk started and the next
+ * record. This simplifies the AG inode walk loop setup code.
+ */
+ return max(inobt_records, 2U);
+}
+
+/*
+ * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn
+ * will be called for each allocated inode, being passed the inode's number and
+ * @data. @max_prefetch controls how many inobt records' worth of inodes we
+ * try to readahead.
+ */
+int
+xfs_iwalk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t startino,
+ unsigned int flags,
+ xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records,
+ void *data)
+{
+ struct xfs_iwalk_ag iwag = {
+ .mp = mp,
+ .tp = tp,
+ .iwalk_fn = iwalk_fn,
+ .data = data,
+ .startino = startino,
+ .sz_recs = xfs_iwalk_prefetch(inode_records),
+ .trim_start = 1,
+ .skip_empty = 1,
+ .pwork = XFS_PWORK_SINGLE_THREADED,
+ };
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ int error;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+ error = xfs_iwalk_alloc(&iwag);
+ if (error)
+ return error;
+
+ for (; agno < mp->m_sb.sb_agcount; agno++) {
+ error = xfs_iwalk_ag(&iwag);
+ if (error)
+ break;
+ iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+ if (flags & XFS_INOBT_WALK_SAME_AG)
+ break;
+ }
+
+ xfs_iwalk_free(&iwag);
+ return error;
+}
+
+/* Run per-thread iwalk work. */
+static int
+xfs_iwalk_ag_work(
+ struct xfs_mount *mp,
+ struct xfs_pwork *pwork)
+{
+ struct xfs_iwalk_ag *iwag;
+ int error = 0;
+
+ iwag = container_of(pwork, struct xfs_iwalk_ag, pwork);
+ if (xfs_pwork_want_abort(pwork))
+ goto out;
+
+ error = xfs_iwalk_alloc(iwag);
+ if (error)
+ goto out;
+
+ error = xfs_iwalk_ag(iwag);
+ xfs_iwalk_free(iwag);
+out:
+ kmem_free(iwag);
+ return error;
+}
+
+/*
+ * Walk all the inodes in the filesystem using multiple threads to process each
+ * AG.
+ */
+int
+xfs_iwalk_threaded(
+ struct xfs_mount *mp,
+ xfs_ino_t startino,
+ unsigned int flags,
+ xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records,
+ bool polled,
+ void *data)
+{
+ struct xfs_pwork_ctl pctl;
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ unsigned int nr_threads;
+ int error;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+ nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
+ error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
+ nr_threads);
+ if (error)
+ return error;
+
+ for (; agno < mp->m_sb.sb_agcount; agno++) {
+ struct xfs_iwalk_ag *iwag;
+
+ if (xfs_pwork_ctl_want_abort(&pctl))
+ break;
+
+ iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP);
+ iwag->mp = mp;
+ iwag->iwalk_fn = iwalk_fn;
+ iwag->data = data;
+ iwag->startino = startino;
+ iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+ xfs_pwork_queue(&pctl, &iwag->pwork);
+ startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+ if (flags & XFS_INOBT_WALK_SAME_AG)
+ break;
+ }
+
+ if (polled)
+ xfs_pwork_poll(&pctl);
+ return xfs_pwork_destroy(&pctl);
+}
+
+/*
+ * Allow callers to cache up to a page's worth of inobt records. This reflects
+ * the existing inumbers prefetching behavior. Since the inobt walk does not
+ * itself do anything with the inobt records, we can set a fairly high limit
+ * here.
+ */
+#define MAX_INOBT_WALK_PREFETCH \
+ (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore))
+
+/*
+ * Given the number of records that the user wanted, set the number of inobt
+ * records that we buffer in memory. Set the maximum if @inobt_records == 0.
+ */
+static inline unsigned int
+xfs_inobt_walk_prefetch(
+ unsigned int inobt_records)
+{
+ /*
+ * If the caller didn't tell us the number of inobt records they
+ * wanted, assume the maximum prefetch possible for best performance.
+ */
+ if (inobt_records == 0)
+ inobt_records = MAX_INOBT_WALK_PREFETCH;
+
+ /*
+ * Allocate enough space to prefetch at least two inobt records so that
+ * we can cache both the record where the iwalk started and the next
+ * record. This simplifies the AG inode walk loop setup code.
+ */
+ inobt_records = max(inobt_records, 2U);
+
+ /*
+ * Cap prefetch at that maximum so that we don't use an absurd amount
+ * of memory.
+ */
+ return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH);
+}
+
+/*
+ * Walk all inode btree records in the filesystem starting from @startino. The
+ * @inobt_walk_fn will be called for each btree record, being passed the incore
+ * record and @data. @max_prefetch controls how many inobt records we try to
+ * cache ahead of time.
+ */
+int
+xfs_inobt_walk(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t startino,
+ unsigned int flags,
+ xfs_inobt_walk_fn inobt_walk_fn,
+ unsigned int inobt_records,
+ void *data)
+{
+ struct xfs_iwalk_ag iwag = {
+ .mp = mp,
+ .tp = tp,
+ .inobt_walk_fn = inobt_walk_fn,
+ .data = data,
+ .startino = startino,
+ .sz_recs = xfs_inobt_walk_prefetch(inobt_records),
+ .pwork = XFS_PWORK_SINGLE_THREADED,
+ };
+ xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ int error;
+
+ ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
+
+ error = xfs_iwalk_alloc(&iwag);
+ if (error)
+ return error;
+
+ for (; agno < mp->m_sb.sb_agcount; agno++) {
+ error = xfs_iwalk_ag(&iwag);
+ if (error)
+ break;
+ iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
+ if (flags & XFS_INOBT_WALK_SAME_AG)
+ break;
+ }
+
+ xfs_iwalk_free(&iwag);
+ return error;
+}
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h
new file mode 100644
index 000000000000..6c960e10ed4d
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_IWALK_H__
+#define __XFS_IWALK_H__
+
+/* Walk all inodes in the filesystem starting from @startino. */
+typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_ino_t ino, void *data);
+/* Return values for xfs_iwalk_fn. */
+#define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE)
+#define XFS_IWALK_ABORT (XFS_ITER_ABORT)
+
+int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino,
+ unsigned int flags, xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records, void *data);
+int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino,
+ unsigned int flags, xfs_iwalk_fn iwalk_fn,
+ unsigned int inode_records, bool poll, void *data);
+
+/* Only iterate inodes within the same AG as @startino. */
+#define XFS_IWALK_SAME_AG (0x1)
+
+#define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG)
+
+/* Walk all inode btree records in the filesystem starting from @startino. */
+typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ const struct xfs_inobt_rec_incore *irec,
+ void *data);
+/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */
+#define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT)
+
+int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_ino_t startino, unsigned int flags,
+ xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records,
+ void *data);
+
+/* Only iterate inobt records within the same AG as @startino. */
+#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG)
+
+#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG)
+
+#endif /* __XFS_IWALK_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index edbd5a210df2..ca15105681ca 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -110,8 +110,6 @@ typedef __u32 xfs_nlink_t;
#define current_restore_flags_nested(sp, f) \
(current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-#define spinlock_destroy(lock)
-
#define NBBY 8 /* number of bits per byte */
/*
@@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
return x;
}
+int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
+ char *data, unsigned int op);
+
#define ASSERT_ALWAYS(expr) \
(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2466b0f5b6c4..00e9f5c388d3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -16,11 +16,7 @@
#include "xfs_trans_priv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode.h"
#include "xfs_trace.h"
-#include "xfs_fsops.h"
-#include "xfs_cksum.h"
#include "xfs_sysfs.h"
#include "xfs_sb.h"
#include "xfs_health.h"
@@ -45,21 +41,14 @@ STATIC int
xlog_space_left(
struct xlog *log,
atomic64_t *head);
-STATIC int
-xlog_sync(
- struct xlog *log,
- struct xlog_in_core *iclog);
STATIC void
xlog_dealloc_log(
struct xlog *log);
/* local state machine functions */
-STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void
-xlog_state_do_callback(
- struct xlog *log,
- int aborted,
- struct xlog_in_core *iclog);
+STATIC void xlog_state_done_syncing(
+ struct xlog_in_core *iclog,
+ bool aborted);
STATIC int
xlog_state_get_iclog_space(
struct xlog *log,
@@ -107,8 +96,7 @@ STATIC void
xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
- int count,
- bool syncing);
+ int count);
STATIC void
xlog_verify_tail_lsn(
struct xlog *log,
@@ -117,7 +105,7 @@ xlog_verify_tail_lsn(
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_tail(a)
-#define xlog_verify_iclog(a,b,c,d)
+#define xlog_verify_iclog(a,b,c)
#define xlog_verify_tail_lsn(a,b,c)
#endif
@@ -541,32 +529,6 @@ xfs_log_done(
return lsn;
}
-/*
- * Attaches a new iclog I/O completion callback routine during
- * transaction commit. If the log is in error state, a non-zero
- * return code is handed back and the caller is responsible for
- * executing the callback at an appropriate time.
- */
-int
-xfs_log_notify(
- struct xlog_in_core *iclog,
- xfs_log_callback_t *cb)
-{
- int abortflg;
-
- spin_lock(&iclog->ic_callback_lock);
- abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
- if (!abortflg) {
- ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
- (iclog->ic_state == XLOG_STATE_WANT_SYNC));
- cb->cb_next = NULL;
- *(iclog->ic_callback_tail) = cb;
- iclog->ic_callback_tail = &(cb->cb_next);
- }
- spin_unlock(&iclog->ic_callback_lock);
- return abortflg;
-}
-
int
xfs_log_release_iclog(
struct xfs_mount *mp,
@@ -807,16 +769,12 @@ xfs_log_mount_finish(
* The mount has failed. Cancel the recovery if it hasn't completed and destroy
* the log.
*/
-int
+void
xfs_log_mount_cancel(
struct xfs_mount *mp)
{
- int error;
-
- error = xlog_recover_cancel(mp->m_log);
+ xlog_recover_cancel(mp->m_log);
xfs_log_unmount(mp);
-
- return error;
}
/*
@@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
* Or, if we are doing a forced umount (typically because of IO errors).
*/
if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
- xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+ xfs_readonly_buftarg(log->l_targ)) {
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
return 0;
}
@@ -1244,53 +1202,49 @@ xlog_space_left(
}
-/*
- * Log function which is called when an io completes.
- *
- * The log manager needs its own routine, in order to control what
- * happens with the buffer after the write completes.
- */
static void
-xlog_iodone(xfs_buf_t *bp)
+xlog_ioend_work(
+ struct work_struct *work)
{
- struct xlog_in_core *iclog = bp->b_log_item;
- struct xlog *l = iclog->ic_log;
- int aborted = 0;
+ struct xlog_in_core *iclog =
+ container_of(work, struct xlog_in_core, ic_end_io_work);
+ struct xlog *log = iclog->ic_log;
+ bool aborted = false;
+ int error;
+
+ error = blk_status_to_errno(iclog->ic_bio.bi_status);
+#ifdef DEBUG
+ /* treat writes with injected CRC errors as failed */
+ if (iclog->ic_fail_crc)
+ error = -EIO;
+#endif
/*
- * Race to shutdown the filesystem if we see an error or the iclog is in
- * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
- * CRC errors into log recovery.
+ * Race to shutdown the filesystem if we see an error.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) ||
- iclog->ic_state & XLOG_STATE_IOABORT) {
- if (iclog->ic_state & XLOG_STATE_IOABORT)
- iclog->ic_state &= ~XLOG_STATE_IOABORT;
-
- xfs_buf_ioerror_alert(bp, __func__);
- xfs_buf_stale(bp);
- xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
+ if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
+ xfs_alert(log->l_mp, "log I/O error %d", error);
+ xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
/*
* This flag will be propagated to the trans-committed
* callback routines to let them know that the log-commit
* didn't succeed.
*/
- aborted = XFS_LI_ABORTED;
+ aborted = true;
} else if (iclog->ic_state & XLOG_STATE_IOERROR) {
- aborted = XFS_LI_ABORTED;
+ aborted = true;
}
- /* log I/O is always issued ASYNC */
- ASSERT(bp->b_flags & XBF_ASYNC);
xlog_state_done_syncing(iclog, aborted);
+ bio_uninit(&iclog->ic_bio);
/*
- * drop the buffer lock now that we are done. Nothing references
- * the buffer after this, so an unmount waiting on this lock can now
- * tear it down safely. As such, it is unsafe to reference the buffer
- * (bp) after the unlock as we could race with it being freed.
+ * Drop the lock to signal that we are done. Nothing references the
+ * iclog after this, so an unmount waiting on this lock can now tear it
+ * down safely. As such, it is unsafe to reference the iclog after the
+ * unlock as we could race with it being freed.
*/
- xfs_buf_unlock(bp);
+ up(&iclog->ic_sema);
}
/*
@@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp)
* If the filesystem blocksize is too large, we may need to choose a
* larger size since the directory code currently logs entire blocks.
*/
-
STATIC void
xlog_get_iclog_buffer_size(
struct xfs_mount *mp,
struct xlog *log)
{
- int size;
- int xhdrs;
-
if (mp->m_logbufs <= 0)
- log->l_iclog_bufs = XLOG_MAX_ICLOGS;
- else
- log->l_iclog_bufs = mp->m_logbufs;
+ mp->m_logbufs = XLOG_MAX_ICLOGS;
+ if (mp->m_logbsize <= 0)
+ mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
+
+ log->l_iclog_bufs = mp->m_logbufs;
+ log->l_iclog_size = mp->m_logbsize;
/*
- * Buffer size passed in from mount system call.
+ * # headers = size / 32k - one header holds cycles from 32k of data.
*/
- if (mp->m_logbsize > 0) {
- size = log->l_iclog_size = mp->m_logbsize;
- log->l_iclog_size_log = 0;
- while (size != 1) {
- log->l_iclog_size_log++;
- size >>= 1;
- }
-
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- /* # headers = size / 32k
- * one header holds cycles from 32k of data
- */
-
- xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
- if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
- xhdrs++;
- log->l_iclog_hsize = xhdrs << BBSHIFT;
- log->l_iclog_heads = xhdrs;
- } else {
- ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
- log->l_iclog_hsize = BBSIZE;
- log->l_iclog_heads = 1;
- }
- goto done;
- }
-
- /* All machines use 32kB buffers by default. */
- log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
- log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
-
- /* the default log size is 16k or 32k which is one header sector */
- log->l_iclog_hsize = BBSIZE;
- log->l_iclog_heads = 1;
-
-done:
- /* are we being asked to make the sizes selected above visible? */
- if (mp->m_logbufs == 0)
- mp->m_logbufs = log->l_iclog_bufs;
- if (mp->m_logbsize == 0)
- mp->m_logbsize = log->l_iclog_size;
-} /* xlog_get_iclog_buffer_size */
-
+ log->l_iclog_heads =
+ DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
+ log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
+}
void
xfs_log_work_queue(
@@ -1422,7 +1337,6 @@ xlog_alloc_log(
xlog_rec_header_t *head;
xlog_in_core_t **iclogp;
xlog_in_core_t *iclog, *prev_iclog=NULL;
- xfs_buf_t *bp;
int i;
int error = -ENOMEM;
uint log2_size = 0;
@@ -1480,30 +1394,6 @@ xlog_alloc_log(
xlog_get_iclog_buffer_size(mp, log);
- /*
- * Use a NULL block for the extra log buffer used during splits so that
- * it will trigger errors if we ever try to do IO on it without first
- * having set it up properly.
- */
- error = -ENOMEM;
- bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
- BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
- if (!bp)
- goto out_free_log;
-
- /*
- * The iclogbuf buffer locks are held over IO but we are not going to do
- * IO yet. Hence unlock the buffer so that the log IO path can grab it
- * when appropriately.
- */
- ASSERT(xfs_buf_islocked(bp));
- xfs_buf_unlock(bp);
-
- /* use high priority wq for log I/O completion */
- bp->b_ioend_wq = mp->m_log_workqueue;
- bp->b_iodone = xlog_iodone;
- log->l_xbuf = bp;
-
spin_lock_init(&log->l_icloglock);
init_waitqueue_head(&log->l_flush_wait);
@@ -1516,29 +1406,22 @@ xlog_alloc_log(
* xlog_in_core_t in xfs_log_priv.h for details.
*/
ASSERT(log->l_iclog_size >= 4096);
- for (i=0; i < log->l_iclog_bufs; i++) {
- *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL);
- if (!*iclogp)
+ for (i = 0; i < log->l_iclog_bufs; i++) {
+ size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
+ sizeof(struct bio_vec);
+
+ iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
+ if (!iclog)
goto out_free_iclog;
- iclog = *iclogp;
+ *iclogp = iclog;
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
- bp = xfs_buf_get_uncached(mp->m_logdev_targp,
- BTOBB(log->l_iclog_size),
- XBF_NO_IOACCT);
- if (!bp)
+ iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
+ KM_MAYFAIL);
+ if (!iclog->ic_data)
goto out_free_iclog;
-
- ASSERT(xfs_buf_islocked(bp));
- xfs_buf_unlock(bp);
-
- /* use high priority wq for log I/O completion */
- bp->b_ioend_wq = mp->m_log_workqueue;
- bp->b_iodone = xlog_iodone;
- iclog->ic_bp = bp;
- iclog->ic_data = bp->b_addr;
#ifdef DEBUG
log->l_iclog_bak[i] = &iclog->ic_header;
#endif
@@ -1552,36 +1435,43 @@ xlog_alloc_log(
head->h_fmt = cpu_to_be32(XLOG_FMT);
memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
- iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
+ iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_log = log;
atomic_set(&iclog->ic_refcnt, 0);
spin_lock_init(&iclog->ic_callback_lock);
- iclog->ic_callback_tail = &(iclog->ic_callback);
+ INIT_LIST_HEAD(&iclog->ic_callbacks);
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
init_waitqueue_head(&iclog->ic_force_wait);
init_waitqueue_head(&iclog->ic_write_wait);
+ INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
+ sema_init(&iclog->ic_sema, 1);
iclogp = &iclog->ic_next;
}
*iclogp = log->l_iclog; /* complete ring */
log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
+ log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
+ mp->m_fsname);
+ if (!log->l_ioend_workqueue)
+ goto out_free_iclog;
+
error = xlog_cil_init(log);
if (error)
- goto out_free_iclog;
+ goto out_destroy_workqueue;
return log;
+out_destroy_workqueue:
+ destroy_workqueue(log->l_ioend_workqueue);
out_free_iclog:
for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
prev_iclog = iclog->ic_next;
- if (iclog->ic_bp)
- xfs_buf_free(iclog->ic_bp);
+ kmem_free(iclog->ic_data);
kmem_free(iclog);
}
- spinlock_destroy(&log->l_icloglock);
- xfs_buf_free(log->l_xbuf);
out_free_log:
kmem_free(log);
out:
@@ -1766,42 +1656,155 @@ xlog_cksum(
return xfs_end_cksum(crc);
}
-/*
- * The bdstrat callback function for log bufs. This gives us a central
- * place to trap bufs in case we get hit by a log I/O error and need to
- * shutdown. Actually, in practice, even when we didn't get a log error,
- * we transition the iclogs to IOERROR state *after* flushing all existing
- * iclogs to disk. This is because we don't want anymore new transactions to be
- * started or completed afterwards.
- *
- * We lock the iclogbufs here so that we can serialise against IO completion
- * during unmount. We might be processing a shutdown triggered during unmount,
- * and that can occur asynchronously to the unmount thread, and hence we need to
- * ensure that completes before tearing down the iclogbufs. Hence we need to
- * hold the buffer lock across the log IO to acheive that.
- */
-STATIC int
-xlog_bdstrat(
- struct xfs_buf *bp)
+static void
+xlog_bio_end_io(
+ struct bio *bio)
{
- struct xlog_in_core *iclog = bp->b_log_item;
+ struct xlog_in_core *iclog = bio->bi_private;
- xfs_buf_lock(bp);
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- xfs_buf_ioerror(bp, -EIO);
- xfs_buf_stale(bp);
- xfs_buf_ioend(bp);
+ queue_work(iclog->ic_log->l_ioend_workqueue,
+ &iclog->ic_end_io_work);
+}
+
+static void
+xlog_map_iclog_data(
+ struct bio *bio,
+ void *data,
+ size_t count)
+{
+ do {
+ struct page *page = kmem_to_page(data);
+ unsigned int off = offset_in_page(data);
+ size_t len = min_t(size_t, count, PAGE_SIZE - off);
+
+ WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
+
+ data += len;
+ count -= len;
+ } while (count);
+}
+
+STATIC void
+xlog_write_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ uint64_t bno,
+ unsigned int count,
+ bool need_flush)
+{
+ ASSERT(bno < log->l_logBBsize);
+
+ /*
+ * We lock the iclogbufs here so that we can serialise against I/O
+ * completion during unmount. We might be processing a shutdown
+ * triggered during unmount, and that can occur asynchronously to the
+ * unmount thread, and hence we need to ensure that completes before
+ * tearing down the iclogbufs. Hence we need to hold the buffer lock
+ * across the log IO to archieve that.
+ */
+ down(&iclog->ic_sema);
+ if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
- * doing it here. Similarly, IO completion will unlock the
- * buffer, so we don't do it here.
+ * doing it here. We kick of the state machine and unlock
+ * the buffer manually, the code needs to be kept in sync
+ * with the I/O completion path.
*/
- return 0;
+ xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+ up(&iclog->ic_sema);
+ return;
}
- xfs_buf_submit(bp);
- return 0;
+ iclog->ic_io_size = count;
+
+ bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
+ bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
+ iclog->ic_bio.bi_end_io = xlog_bio_end_io;
+ iclog->ic_bio.bi_private = iclog;
+ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
+ if (need_flush)
+ iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
+
+ xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+ if (is_vmalloc_addr(iclog->ic_data))
+ flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+
+ /*
+ * If this log buffer would straddle the end of the log we will have
+ * to split it up into two bios, so that we can continue at the start.
+ */
+ if (bno + BTOBB(count) > log->l_logBBsize) {
+ struct bio *split;
+
+ split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
+ GFP_NOIO, &fs_bio_set);
+ bio_chain(split, &iclog->ic_bio);
+ submit_bio(split);
+
+ /* restart at logical offset zero for the remainder */
+ iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
+ }
+
+ submit_bio(&iclog->ic_bio);
+}
+
+/*
+ * We need to bump cycle number for the part of the iclog that is
+ * written to the start of the log. Watch out for the header magic
+ * number case, though.
+ */
+static void
+xlog_split_iclog(
+ struct xlog *log,
+ void *data,
+ uint64_t bno,
+ unsigned int count)
+{
+ unsigned int split_offset = BBTOB(log->l_logBBsize - bno);
+ unsigned int i;
+
+ for (i = split_offset; i < count; i += BBSIZE) {
+ uint32_t cycle = get_unaligned_be32(data + i);
+
+ if (++cycle == XLOG_HEADER_MAGIC_NUM)
+ cycle++;
+ put_unaligned_be32(cycle, data + i);
+ }
+}
+
+static int
+xlog_calc_iclog_size(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ uint32_t *roundoff)
+{
+ uint32_t count_init, count;
+ bool use_lsunit;
+
+ use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
+ log->l_mp->m_sb.sb_logsunit > 1;
+
+ /* Add for LR header */
+ count_init = log->l_iclog_hsize + iclog->ic_offset;
+
+ /* Round out the log write size */
+ if (use_lsunit) {
+ /* we have a v2 stripe unit to use */
+ count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
+ } else {
+ count = BBTOB(BTOBB(count_init));
+ }
+
+ ASSERT(count >= count_init);
+ *roundoff = count - count_init;
+
+ if (use_lsunit)
+ ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
+ else
+ ASSERT(*roundoff < BBTOB(1));
+ return count;
}
/*
@@ -1824,46 +1827,23 @@ xlog_bdstrat(
* log will require grabbing the lock though.
*
* The entire log manager uses a logical block numbering scheme. Only
- * log_sync (and then only bwrite()) know about the fact that the log may
- * not start with block zero on a given device. The log block start offset
- * is added immediately before calling bwrite().
+ * xlog_write_iclog knows about the fact that the log may not start with
+ * block zero on a given device.
*/
-
-STATIC int
+STATIC void
xlog_sync(
struct xlog *log,
struct xlog_in_core *iclog)
{
- xfs_buf_t *bp;
- int i;
- uint count; /* byte count of bwrite */
- uint count_init; /* initial count before roundup */
- int roundoff; /* roundoff to BB or stripe */
- int split = 0; /* split write into two regions */
- int error;
- int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
- int size;
+ unsigned int count; /* byte count of bwrite */
+ unsigned int roundoff; /* roundoff to BB or stripe */
+ uint64_t bno;
+ unsigned int size;
+ bool need_flush = true, split = false;
- XFS_STATS_INC(log->l_mp, xs_log_writes);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
- /* Add for LR header */
- count_init = log->l_iclog_hsize + iclog->ic_offset;
-
- /* Round out the log write size */
- if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
- /* we have a v2 stripe unit to use */
- count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
- } else {
- count = BBTOB(BTOBB(count_init));
- }
- roundoff = count - count_init;
- ASSERT(roundoff >= 0);
- ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 &&
- roundoff < log->l_mp->m_sb.sb_logsunit)
- ||
- (log->l_mp->m_sb.sb_logsunit <= 1 &&
- roundoff < BBTOB(1)));
+ count = xlog_calc_iclog_size(log, iclog, &roundoff);
/* move grant heads by roundoff in sync */
xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
@@ -1874,41 +1854,19 @@ xlog_sync(
/* real byte length */
size = iclog->ic_offset;
- if (v2)
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
size += roundoff;
iclog->ic_header.h_len = cpu_to_be32(size);
- bp = iclog->ic_bp;
- XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
-
+ XFS_STATS_INC(log->l_mp, xs_log_writes);
XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
- /* Do we need to split this write into 2 parts? */
- if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
- char *dptr;
-
- split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
- count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
- iclog->ic_bwritecnt = 2;
+ bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
- /*
- * Bump the cycle numbers at the start of each block in the
- * part of the iclog that ends up in the buffer that gets
- * written to the start of the log.
- *
- * Watch out for the header magic number case, though.
- */
- dptr = (char *)&iclog->ic_header + count;
- for (i = 0; i < split; i += BBSIZE) {
- uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
- if (++cycle == XLOG_HEADER_MAGIC_NUM)
- cycle++;
- *(__be32 *)dptr = cpu_to_be32(cycle);
-
- dptr += BBSIZE;
- }
- } else {
- iclog->ic_bwritecnt = 1;
+ /* Do we need to split this write into 2 parts? */
+ if (bno + BTOBB(count) > log->l_logBBsize) {
+ xlog_split_iclog(log, &iclog->ic_header, bno, count);
+ split = true;
}
/* calculcate the checksum */
@@ -1921,18 +1879,15 @@ xlog_sync(
* write on I/O completion and shutdown the fs. The subsequent mount
* detects the bad CRC and attempts to recover.
*/
+#ifdef DEBUG
if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
- iclog->ic_state |= XLOG_STATE_IOABORT;
+ iclog->ic_fail_crc = true;
xfs_warn(log->l_mp,
"Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
be64_to_cpu(iclog->ic_header.h_lsn));
}
-
- bp->b_io_length = BTOBB(count);
- bp->b_log_item = iclog;
- bp->b_flags &= ~XBF_FLUSH;
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
+#endif
/*
* Flush the data device before flushing the log to make sure all meta
@@ -1942,50 +1897,14 @@ xlog_sync(
* synchronously here; for an internal log we can simply use the block
* layer state machine for preflushes.
*/
- if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
+ if (log->l_targ != log->l_mp->m_ddev_targp || split) {
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
- else
- bp->b_flags |= XBF_FLUSH;
-
- ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
- ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
- xlog_verify_iclog(log, iclog, count, true);
-
- /* account for log which doesn't start at block #0 */
- XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
-
- /*
- * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
- * is shutting down.
- */
- error = xlog_bdstrat(bp);
- if (error) {
- xfs_buf_ioerror_alert(bp, "xlog_sync");
- return error;
+ need_flush = false;
}
- if (split) {
- bp = iclog->ic_log->l_xbuf;
- XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
- xfs_buf_associate_memory(bp,
- (char *)&iclog->ic_header + count, split);
- bp->b_log_item = iclog;
- bp->b_flags &= ~XBF_FLUSH;
- bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
-
- ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
- ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
-
- /* account for internal log which doesn't start at block #0 */
- XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
- error = xlog_bdstrat(bp);
- if (error) {
- xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
- return error;
- }
- }
- return 0;
-} /* xlog_sync */
+
+ xlog_verify_iclog(log, iclog, count);
+ xlog_write_iclog(log, iclog, bno, count, need_flush);
+}
/*
* Deallocate a log structure
@@ -2005,31 +1924,21 @@ xlog_dealloc_log(
*/
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
- xfs_buf_lock(iclog->ic_bp);
- xfs_buf_unlock(iclog->ic_bp);
+ down(&iclog->ic_sema);
+ up(&iclog->ic_sema);
iclog = iclog->ic_next;
}
- /*
- * Always need to ensure that the extra buffer does not point to memory
- * owned by another log buffer before we free it. Also, cycle the lock
- * first to ensure we've completed IO on it.
- */
- xfs_buf_lock(log->l_xbuf);
- xfs_buf_unlock(log->l_xbuf);
- xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
- xfs_buf_free(log->l_xbuf);
-
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
- xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
+ kmem_free(iclog->ic_data);
kmem_free(iclog);
iclog = next_iclog;
}
- spinlock_destroy(&log->l_icloglock);
log->l_mp->m_log = NULL;
+ destroy_workqueue(log->l_ioend_workqueue);
kmem_free(log);
} /* xlog_dealloc_log */
@@ -2610,7 +2519,7 @@ xlog_state_clean_log(
if (iclog->ic_state == XLOG_STATE_DIRTY) {
iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_offset = 0;
- ASSERT(iclog->ic_callback == NULL);
+ ASSERT(list_empty_careful(&iclog->ic_callbacks));
/*
* If the number of ops in this iclog indicate it just
* contains the dummy transaction, we can
@@ -2680,37 +2589,32 @@ xlog_state_clean_log(
STATIC xfs_lsn_t
xlog_get_lowest_lsn(
- struct xlog *log)
+ struct xlog *log)
{
- xlog_in_core_t *lsn_log;
- xfs_lsn_t lowest_lsn, lsn;
+ struct xlog_in_core *iclog = log->l_iclog;
+ xfs_lsn_t lowest_lsn = 0, lsn;
- lsn_log = log->l_iclog;
- lowest_lsn = 0;
do {
- if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) {
- lsn = be64_to_cpu(lsn_log->ic_header.h_lsn);
- if ((lsn && !lowest_lsn) ||
- (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) {
+ if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+ continue;
+
+ lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
lowest_lsn = lsn;
- }
- }
- lsn_log = lsn_log->ic_next;
- } while (lsn_log != log->l_iclog);
+ } while ((iclog = iclog->ic_next) != log->l_iclog);
+
return lowest_lsn;
}
-
STATIC void
xlog_state_do_callback(
struct xlog *log,
- int aborted,
+ bool aborted,
struct xlog_in_core *ciclog)
{
xlog_in_core_t *iclog;
xlog_in_core_t *first_iclog; /* used to know when we've
* processed all iclogs once */
- xfs_log_callback_t *cb, *cb_next;
int flushcnt = 0;
xfs_lsn_t lowest_lsn;
int ioerrors; /* counter: iclogs with errors */
@@ -2821,7 +2725,7 @@ xlog_state_do_callback(
*/
ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- if (iclog->ic_callback)
+ if (!list_empty_careful(&iclog->ic_callbacks))
atomic64_set(&log->l_last_sync_lsn,
be64_to_cpu(iclog->ic_header.h_lsn));
@@ -2838,26 +2742,20 @@ xlog_state_do_callback(
* callbacks being added.
*/
spin_lock(&iclog->ic_callback_lock);
- cb = iclog->ic_callback;
- while (cb) {
- iclog->ic_callback_tail = &(iclog->ic_callback);
- iclog->ic_callback = NULL;
- spin_unlock(&iclog->ic_callback_lock);
+ while (!list_empty(&iclog->ic_callbacks)) {
+ LIST_HEAD(tmp);
- /* perform callbacks in the order given */
- for (; cb; cb = cb_next) {
- cb_next = cb->cb_next;
- cb->cb_func(cb->cb_arg, aborted);
- }
+ list_splice_init(&iclog->ic_callbacks, &tmp);
+
+ spin_unlock(&iclog->ic_callback_lock);
+ xlog_cil_process_committed(&tmp, aborted);
spin_lock(&iclog->ic_callback_lock);
- cb = iclog->ic_callback;
}
loopdidcallbacks++;
funcdidcallbacks++;
spin_lock(&log->l_icloglock);
- ASSERT(iclog->ic_callback == NULL);
spin_unlock(&iclog->ic_callback_lock);
if (!(iclog->ic_state & XLOG_STATE_IOERROR))
iclog->ic_state = XLOG_STATE_DIRTY;
@@ -2943,18 +2841,16 @@ xlog_state_do_callback(
*/
STATIC void
xlog_state_done_syncing(
- xlog_in_core_t *iclog,
- int aborted)
+ struct xlog_in_core *iclog,
+ bool aborted)
{
- struct xlog *log = iclog->ic_log;
+ struct xlog *log = iclog->ic_log;
spin_lock(&log->l_icloglock);
ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_IOERROR);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
- ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
-
/*
* If we got an error, either on the first buffer, or in the case of
@@ -2962,13 +2858,8 @@ xlog_state_done_syncing(
* and none should ever be attempted to be written to disk
* again.
*/
- if (iclog->ic_state != XLOG_STATE_IOERROR) {
- if (--iclog->ic_bwritecnt == 1) {
- spin_unlock(&log->l_icloglock);
- return;
- }
+ if (iclog->ic_state != XLOG_STATE_IOERROR)
iclog->ic_state = XLOG_STATE_DONE_SYNC;
- }
/*
* Someone could be sleeping prior to writing out the next
@@ -3237,7 +3128,7 @@ xlog_state_release_iclog(
* flags after this point.
*/
if (sync)
- return xlog_sync(log, iclog);
+ xlog_sync(log, iclog);
return 0;
} /* xlog_state_release_iclog */
@@ -3828,8 +3719,7 @@ STATIC void
xlog_verify_iclog(
struct xlog *log,
struct xlog_in_core *iclog,
- int count,
- bool syncing)
+ int count)
{
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
@@ -3873,7 +3763,7 @@ xlog_verify_iclog(
/* clientid is only 1 byte */
p = &ophead->oh_clientid;
field_offset = p - base_ptr;
- if (!syncing || (field_offset & 0x1ff)) {
+ if (field_offset & 0x1ff) {
clientid = ophead->oh_clientid;
} else {
idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
@@ -3896,7 +3786,7 @@ xlog_verify_iclog(
/* check length */
p = &ophead->oh_len;
field_offset = p - base_ptr;
- if (!syncing || (field_offset & 0x1ff)) {
+ if (field_offset & 0x1ff) {
op_len = be32_to_cpu(ophead->oh_len);
} else {
idx = BTOBBT((uintptr_t)&ophead->oh_len -
@@ -4033,7 +3923,7 @@ xfs_log_force_umount(
* avoid races.
*/
wake_up_all(&log->l_cilp->xc_commit_wait);
- xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
+ xlog_state_do_callback(log, true, NULL);
#ifdef XFSERRORDEBUG
{
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 73a64bf32f6f..84e06805160f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -6,6 +6,8 @@
#ifndef __XFS_LOG_H__
#define __XFS_LOG_H__
+struct xfs_cil_ctx;
+
struct xfs_log_vec {
struct xfs_log_vec *lv_next; /* next lv in build list */
int lv_niovecs; /* number of iovecs in lv */
@@ -72,16 +74,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
}
/*
- * Structure used to pass callback function and the function's argument
- * to the log manager.
- */
-typedef struct xfs_log_callback {
- struct xfs_log_callback *cb_next;
- void (*cb_func)(void *, int);
- void *cb_arg;
-} xfs_log_callback_t;
-
-/*
* By comparing each component, we don't have to worry about extra
* endian issues in treating two 32 bit numbers as one 64 bit number
*/
@@ -125,12 +117,10 @@ int xfs_log_mount(struct xfs_mount *mp,
xfs_daddr_t start_block,
int num_bblocks);
int xfs_log_mount_finish(struct xfs_mount *mp);
-int xfs_log_mount_cancel(struct xfs_mount *);
+void xfs_log_mount_cancel(struct xfs_mount *);
xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
void xfs_log_space_wake(struct xfs_mount *mp);
-int xfs_log_notify(struct xlog_in_core *iclog,
- struct xfs_log_callback *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
struct xlog_in_core *iclog);
int xfs_log_reserve(struct xfs_mount *mp,
@@ -148,6 +138,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_lsn_t *commit_lsn, bool regrant);
+void xlog_cil_process_committed(struct list_head *list, bool aborted);
bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
void xfs_log_work_queue(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 5e595948bc5a..fa5602d0fd7f 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -10,10 +10,7 @@
#include "xfs_shared.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_error.h"
-#include "xfs_alloc.h"
#include "xfs_extent_busy.h"
-#include "xfs_discard.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_log.h"
@@ -246,7 +243,8 @@ xfs_cil_prepare_item(
* shadow buffer, so update the the pointer to it appropriately.
*/
if (!old_lv) {
- lv->lv_item->li_ops->iop_pin(lv->lv_item);
+ if (lv->lv_item->li_ops->iop_pin)
+ lv->lv_item->li_ops->iop_pin(lv->lv_item);
lv->lv_item->li_lv_shadow = NULL;
} else if (old_lv != lv) {
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
@@ -576,10 +574,9 @@ xlog_discard_busy_extents(
*/
static void
xlog_cil_committed(
- void *args,
- int abort)
+ struct xfs_cil_ctx *ctx,
+ bool abort)
{
- struct xfs_cil_ctx *ctx = args;
struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
/*
@@ -614,6 +611,20 @@ xlog_cil_committed(
kmem_free(ctx);
}
+void
+xlog_cil_process_committed(
+ struct list_head *list,
+ bool aborted)
+{
+ struct xfs_cil_ctx *ctx;
+
+ while ((ctx = list_first_entry_or_null(list,
+ struct xfs_cil_ctx, iclog_entry))) {
+ list_del(&ctx->iclog_entry);
+ xlog_cil_committed(ctx, aborted);
+ }
+}
+
/*
* Push the Committed Item List to the log. If @push_seq flag is zero, then it
* is a background flush and so we can chose to ignore it. Otherwise, if the
@@ -835,12 +846,15 @@ restart:
if (commit_lsn == -1)
goto out_abort;
- /* attach all the transactions w/ busy extents to iclog */
- ctx->log_cb.cb_func = xlog_cil_committed;
- ctx->log_cb.cb_arg = ctx;
- error = xfs_log_notify(commit_iclog, &ctx->log_cb);
- if (error)
+ spin_lock(&commit_iclog->ic_callback_lock);
+ if (commit_iclog->ic_state & XLOG_STATE_IOERROR) {
+ spin_unlock(&commit_iclog->ic_callback_lock);
goto out_abort;
+ }
+ ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
+ commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
+ list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
+ spin_unlock(&commit_iclog->ic_callback_lock);
/*
* now the checkpoint commit is complete and we've attached the
@@ -864,7 +878,7 @@ out_skip:
out_abort_free_ticket:
xfs_log_ticket_put(tic);
out_abort:
- xlog_cil_committed(ctx, XFS_LI_ABORTED);
+ xlog_cil_committed(ctx, true);
return -EIO;
}
@@ -984,6 +998,7 @@ xfs_log_commit_cil(
{
struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
+ struct xfs_log_item *lip, *next;
xfs_lsn_t xc_commit_lsn;
/*
@@ -1008,7 +1023,7 @@ xfs_log_commit_cil(
/*
* Once all the items of the transaction have been copied to the CIL,
- * the items can be unlocked and freed.
+ * the items can be unlocked and possibly freed.
*
* This needs to be done before we drop the CIL context lock because we
* have to update state in the log items and unlock them before they go
@@ -1017,8 +1032,12 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, xc_commit_lsn, false);
-
+ trace_xfs_trans_commit_items(tp, _RET_IP_);
+ list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
+ xfs_trans_del_item(lip);
+ if (lip->li_ops->iop_committing)
+ lip->li_ops->iop_committing(lip, xc_commit_lsn);
+ }
xlog_cil_push_background(log);
up_read(&cil->xc_ctx_lock);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b5f82cb36202..b880c23cb6e4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -10,7 +10,6 @@ struct xfs_buf;
struct xlog;
struct xlog_ticket;
struct xfs_mount;
-struct xfs_log_callback;
/*
* Flags for log structure
@@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
-#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
@@ -179,11 +177,10 @@ typedef struct xlog_ticket {
* the iclog.
* - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
* - ic_next is the pointer to the next iclog in the ring.
- * - ic_bp is a pointer to the buffer used to write this incore log to disk.
* - ic_log is a pointer back to the global log structure.
- * - ic_callback is a linked list of callback function/argument pairs to be
- * called after an iclog finishes writing.
- * - ic_size is the full size of the header plus data.
+ * - ic_size is the full size of the log buffer, minus the cycle headers.
+ * - ic_io_size is the size of the currently pending log buffer write, which
+ * might be smaller than ic_size
* - ic_offset is the current number of bytes written to in this iclog.
* - ic_refcnt is bumped when someone is writing to the log.
* - ic_state is the state of the iclog.
@@ -193,7 +190,7 @@ typedef struct xlog_ticket {
* structure cacheline aligned. The following fields can be contended on
* by independent processes:
*
- * - ic_callback_*
+ * - ic_callbacks
* - ic_refcnt
* - fields protected by the global l_icloglock
*
@@ -206,23 +203,28 @@ typedef struct xlog_in_core {
wait_queue_head_t ic_write_wait;
struct xlog_in_core *ic_next;
struct xlog_in_core *ic_prev;
- struct xfs_buf *ic_bp;
struct xlog *ic_log;
- int ic_size;
- int ic_offset;
- int ic_bwritecnt;
+ u32 ic_size;
+ u32 ic_io_size;
+ u32 ic_offset;
unsigned short ic_state;
char *ic_datap; /* pointer to iclog data */
/* Callback structures need their own cacheline */
spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
- struct xfs_log_callback *ic_callback;
- struct xfs_log_callback **ic_callback_tail;
+ struct list_head ic_callbacks;
/* reference counts need their own cacheline */
atomic_t ic_refcnt ____cacheline_aligned_in_smp;
xlog_in_core_2_t *ic_data;
#define ic_header ic_data->hic_header
+#ifdef DEBUG
+ bool ic_fail_crc : 1;
+#endif
+ struct semaphore ic_sema;
+ struct work_struct ic_end_io_work;
+ struct bio ic_bio;
+ struct bio_vec ic_bvec[];
} xlog_in_core_t;
/*
@@ -243,7 +245,7 @@ struct xfs_cil_ctx {
int space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */
struct xfs_log_vec *lv_chain; /* logvecs being pushed */
- struct xfs_log_callback log_cb; /* completion callback hook. */
+ struct list_head iclog_entry;
struct list_head committing; /* ctx committing list */
struct work_struct discard_endio_work;
};
@@ -350,9 +352,8 @@ struct xlog {
struct xfs_mount *l_mp; /* mount point */
struct xfs_ail *l_ailp; /* AIL log is working with */
struct xfs_cil *l_cilp; /* CIL log is working with */
- struct xfs_buf *l_xbuf; /* extra buffer for log
- * wrapping */
struct xfs_buftarg *l_targ; /* buftarg of log */
+ struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */
struct delayed_work l_work; /* background flush work */
uint l_flags;
uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
@@ -361,7 +362,6 @@ struct xlog {
int l_iclog_heads; /* # of iclog header sectors */
uint l_sectBBsize; /* sector size in BBs (2^n) */
int l_iclog_size; /* size of log in bytes */
- int l_iclog_size_log; /* log power size of log */
int l_iclog_bufs; /* number of iclog buffers */
xfs_daddr_t l_logBBstart; /* start block of log */
int l_logsize; /* size of log in bytes */
@@ -418,7 +418,7 @@ xlog_recover(
extern int
xlog_recover_finish(
struct xlog *log);
-extern int
+extern void
xlog_recover_cancel(struct xlog *);
extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9329f5adbfbe..13d1d3e95b88 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -13,8 +13,6 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_log.h"
@@ -26,7 +24,6 @@
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
-#include "xfs_cksum.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_bmap_btree.h"
@@ -79,7 +76,7 @@ struct xfs_buf_cancel {
* are valid, false otherwise.
*/
static inline bool
-xlog_verify_bp(
+xlog_verify_bno(
struct xlog *log,
xfs_daddr_t blk_no,
int bbcount)
@@ -92,22 +89,19 @@ xlog_verify_bp(
}
/*
- * Allocate a buffer to hold log data. The buffer needs to be able
- * to map to a range of nbblks basic blocks at any valid (basic
- * block) offset within the log.
+ * Allocate a buffer to hold log data. The buffer needs to be able to map to
+ * a range of nbblks basic blocks at any valid offset within the log.
*/
-STATIC xfs_buf_t *
-xlog_get_bp(
+static char *
+xlog_alloc_buffer(
struct xlog *log,
int nbblks)
{
- struct xfs_buf *bp;
-
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
*/
- if (!xlog_verify_bp(log, 0, nbblks)) {
+ if (!xlog_verify_bno(log, 0, nbblks)) {
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -115,69 +109,48 @@ xlog_get_bp(
}
/*
- * We do log I/O in units of log sectors (a power-of-2
- * multiple of the basic block size), so we round up the
- * requested size to accommodate the basic blocks required
- * for complete log sectors.
+ * We do log I/O in units of log sectors (a power-of-2 multiple of the
+ * basic block size), so we round up the requested size to accommodate
+ * the basic blocks required for complete log sectors.
*
- * In addition, the buffer may be used for a non-sector-
- * aligned block offset, in which case an I/O of the
- * requested size could extend beyond the end of the
- * buffer. If the requested size is only 1 basic block it
- * will never straddle a sector boundary, so this won't be
- * an issue. Nor will this be a problem if the log I/O is
- * done in basic blocks (sector size 1). But otherwise we
- * extend the buffer by one extra log sector to ensure
- * there's space to accommodate this possibility.
+ * In addition, the buffer may be used for a non-sector-aligned block
+ * offset, in which case an I/O of the requested size could extend
+ * beyond the end of the buffer. If the requested size is only 1 basic
+ * block it will never straddle a sector boundary, so this won't be an
+ * issue. Nor will this be a problem if the log I/O is done in basic
+ * blocks (sector size 1). But otherwise we extend the buffer by one
+ * extra log sector to ensure there's space to accommodate this
+ * possibility.
*/
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
-
- bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
- if (bp)
- xfs_buf_unlock(bp);
- return bp;
-}
-
-STATIC void
-xlog_put_bp(
- xfs_buf_t *bp)
-{
- xfs_buf_free(bp);
+ return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
}
/*
* Return the address of the start of the given block number's data
* in a log buffer. The buffer covers a log sector-aligned region.
*/
-STATIC char *
+static inline unsigned int
xlog_align(
struct xlog *log,
- xfs_daddr_t blk_no,
- int nbblks,
- struct xfs_buf *bp)
+ xfs_daddr_t blk_no)
{
- xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
-
- ASSERT(offset + nbblks <= bp->b_length);
- return bp->b_addr + BBTOB(offset);
+ return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
}
-
-/*
- * nbblks should be uint, but oh well. Just want to catch that 32-bit length.
- */
-STATIC int
-xlog_bread_noalign(
- struct xlog *log,
- xfs_daddr_t blk_no,
- int nbblks,
- struct xfs_buf *bp)
+static int
+xlog_do_io(
+ struct xlog *log,
+ xfs_daddr_t blk_no,
+ unsigned int nbblks,
+ char *data,
+ unsigned int op)
{
- int error;
+ int error;
- if (!xlog_verify_bp(log, blk_no, nbblks)) {
+ if (!xlog_verify_bno(log, blk_no, nbblks)) {
xfs_warn(log->l_mp,
"Invalid log block/length (0x%llx, 0x%x) for buffer",
blk_no, nbblks);
@@ -187,107 +160,53 @@ xlog_bread_noalign(
blk_no = round_down(blk_no, log->l_sectBBsize);
nbblks = round_up(nbblks, log->l_sectBBsize);
-
ASSERT(nbblks > 0);
- ASSERT(nbblks <= bp->b_length);
-
- XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
- bp->b_flags |= XBF_READ;
- bp->b_io_length = nbblks;
- bp->b_error = 0;
- error = xfs_buf_submit(bp);
- if (error && !XFS_FORCED_SHUTDOWN(log->l_mp))
- xfs_buf_ioerror_alert(bp, __func__);
+ error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
+ BBTOB(nbblks), data, op);
+ if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ xfs_alert(log->l_mp,
+ "log recovery %s I/O error at daddr 0x%llx len %d error %d",
+ op == REQ_OP_WRITE ? "write" : "read",
+ blk_no, nbblks, error);
+ }
return error;
}
STATIC int
-xlog_bread(
+xlog_bread_noalign(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- struct xfs_buf *bp,
- char **offset)
+ char *data)
{
- int error;
-
- error = xlog_bread_noalign(log, blk_no, nbblks, bp);
- if (error)
- return error;
-
- *offset = xlog_align(log, blk_no, nbblks, bp);
- return 0;
+ return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
}
-/*
- * Read at an offset into the buffer. Returns with the buffer in it's original
- * state regardless of the result of the read.
- */
STATIC int
-xlog_bread_offset(
+xlog_bread(
struct xlog *log,
- xfs_daddr_t blk_no, /* block to read from */
- int nbblks, /* blocks to read */
- struct xfs_buf *bp,
- char *offset)
+ xfs_daddr_t blk_no,
+ int nbblks,
+ char *data,
+ char **offset)
{
- char *orig_offset = bp->b_addr;
- int orig_len = BBTOB(bp->b_length);
- int error, error2;
-
- error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
- if (error)
- return error;
-
- error = xlog_bread_noalign(log, blk_no, nbblks, bp);
+ int error;
- /* must reset buffer pointer even on error */
- error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
- if (error)
- return error;
- return error2;
+ error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
+ if (!error)
+ *offset = data + xlog_align(log, blk_no);
+ return error;
}
-/*
- * Write out the buffer at the given block for the given number of blocks.
- * The buffer is kept locked across the write and is returned locked.
- * This can only be used for synchronous log writes.
- */
STATIC int
xlog_bwrite(
struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- struct xfs_buf *bp)
+ char *data)
{
- int error;
-
- if (!xlog_verify_bp(log, blk_no, nbblks)) {
- xfs_warn(log->l_mp,
- "Invalid log block/length (0x%llx, 0x%x) for buffer",
- blk_no, nbblks);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
- return -EFSCORRUPTED;
- }
-
- blk_no = round_down(blk_no, log->l_sectBBsize);
- nbblks = round_up(nbblks, log->l_sectBBsize);
-
- ASSERT(nbblks > 0);
- ASSERT(nbblks <= bp->b_length);
-
- XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
- xfs_buf_hold(bp);
- xfs_buf_lock(bp);
- bp->b_io_length = nbblks;
- bp->b_error = 0;
-
- error = xfs_bwrite(bp);
- if (error)
- xfs_buf_ioerror_alert(bp, __func__);
- xfs_buf_relse(bp);
- return error;
+ return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
}
#ifdef DEBUG
@@ -377,10 +296,9 @@ xlog_recover_iodone(
* We're not going to bother about retrying
* this during recovery. One strike!
*/
- if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
+ if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
xfs_buf_ioerror_alert(bp, __func__);
- xfs_force_shutdown(bp->b_target->bt_mount,
- SHUTDOWN_META_IO_ERROR);
+ xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
}
}
@@ -405,7 +323,7 @@ xlog_recover_iodone(
STATIC int
xlog_find_cycle_start(
struct xlog *log,
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t first_blk,
xfs_daddr_t *last_blk,
uint cycle)
@@ -419,7 +337,7 @@ xlog_find_cycle_start(
end_blk = *last_blk;
mid_blk = BLK_AVG(first_blk, end_blk);
while (mid_blk != first_blk && mid_blk != end_blk) {
- error = xlog_bread(log, mid_blk, 1, bp, &offset);
+ error = xlog_bread(log, mid_blk, 1, buffer, &offset);
if (error)
return error;
mid_cycle = xlog_get_cycle(offset);
@@ -455,7 +373,7 @@ xlog_find_verify_cycle(
{
xfs_daddr_t i, j;
uint cycle;
- xfs_buf_t *bp;
+ char *buffer;
xfs_daddr_t bufblks;
char *buf = NULL;
int error = 0;
@@ -469,7 +387,7 @@ xlog_find_verify_cycle(
bufblks = 1 << ffs(nbblks);
while (bufblks > log->l_logBBsize)
bufblks >>= 1;
- while (!(bp = xlog_get_bp(log, bufblks))) {
+ while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
bufblks >>= 1;
if (bufblks < log->l_sectBBsize)
return -ENOMEM;
@@ -480,7 +398,7 @@ xlog_find_verify_cycle(
bcount = min(bufblks, (start_blk + nbblks - i));
- error = xlog_bread(log, i, bcount, bp, &buf);
+ error = xlog_bread(log, i, bcount, buffer, &buf);
if (error)
goto out;
@@ -498,7 +416,7 @@ xlog_find_verify_cycle(
*new_blk = -1;
out:
- xlog_put_bp(bp);
+ kmem_free(buffer);
return error;
}
@@ -522,7 +440,7 @@ xlog_find_verify_log_record(
int extra_bblks)
{
xfs_daddr_t i;
- xfs_buf_t *bp;
+ char *buffer;
char *offset = NULL;
xlog_rec_header_t *head = NULL;
int error = 0;
@@ -532,12 +450,14 @@ xlog_find_verify_log_record(
ASSERT(start_blk != 0 || *last_blk != start_blk);
- if (!(bp = xlog_get_bp(log, num_blks))) {
- if (!(bp = xlog_get_bp(log, 1)))
+ buffer = xlog_alloc_buffer(log, num_blks);
+ if (!buffer) {
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
smallmem = 1;
} else {
- error = xlog_bread(log, start_blk, num_blks, bp, &offset);
+ error = xlog_bread(log, start_blk, num_blks, buffer, &offset);
if (error)
goto out;
offset += ((num_blks - 1) << BBSHIFT);
@@ -554,7 +474,7 @@ xlog_find_verify_log_record(
}
if (smallmem) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out;
}
@@ -607,7 +527,7 @@ xlog_find_verify_log_record(
*last_blk = i;
out:
- xlog_put_bp(bp);
+ kmem_free(buffer);
return error;
}
@@ -629,7 +549,7 @@ xlog_find_head(
struct xlog *log,
xfs_daddr_t *return_head_blk)
{
- xfs_buf_t *bp;
+ char *buffer;
char *offset;
xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
int num_scan_bblks;
@@ -659,20 +579,20 @@ xlog_find_head(
}
first_blk = 0; /* get cycle # of 1st block */
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
- error = xlog_bread(log, 0, 1, bp, &offset);
+ error = xlog_bread(log, 0, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
first_half_cycle = xlog_get_cycle(offset);
last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
- error = xlog_bread(log, last_blk, 1, bp, &offset);
+ error = xlog_bread(log, last_blk, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
last_half_cycle = xlog_get_cycle(offset);
ASSERT(last_half_cycle != 0);
@@ -740,9 +660,10 @@ xlog_find_head(
* ^ we want to locate this spot
*/
stop_on_cycle = last_half_cycle;
- if ((error = xlog_find_cycle_start(log, bp, first_blk,
- &head_blk, last_half_cycle)))
- goto bp_err;
+ error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk,
+ last_half_cycle);
+ if (error)
+ goto out_free_buffer;
}
/*
@@ -762,7 +683,7 @@ xlog_find_head(
if ((error = xlog_find_verify_cycle(log,
start_blk, num_scan_bblks,
stop_on_cycle, &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1)
head_blk = new_blk;
} else { /* need to read 2 parts of log */
@@ -799,7 +720,7 @@ xlog_find_head(
if ((error = xlog_find_verify_cycle(log, start_blk,
num_scan_bblks - (int)head_blk,
(stop_on_cycle - 1), &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1) {
head_blk = new_blk;
goto validate_head;
@@ -815,7 +736,7 @@ xlog_find_head(
if ((error = xlog_find_verify_cycle(log,
start_blk, (int)head_blk,
stop_on_cycle, &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1)
head_blk = new_blk;
}
@@ -834,13 +755,13 @@ validate_head:
if (error == 1)
error = -EIO;
if (error)
- goto bp_err;
+ goto out_free_buffer;
} else {
start_blk = 0;
ASSERT(head_blk <= INT_MAX);
error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
if (error < 0)
- goto bp_err;
+ goto out_free_buffer;
if (error == 1) {
/* We hit the beginning of the log during our search */
start_blk = log_bbnum - (num_scan_bblks - head_blk);
@@ -853,14 +774,14 @@ validate_head:
if (error == 1)
error = -EIO;
if (error)
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != log_bbnum)
head_blk = new_blk;
} else if (error)
- goto bp_err;
+ goto out_free_buffer;
}
- xlog_put_bp(bp);
+ kmem_free(buffer);
if (head_blk == log_bbnum)
*return_head_blk = 0;
else
@@ -873,9 +794,8 @@ validate_head:
*/
return 0;
- bp_err:
- xlog_put_bp(bp);
-
+out_free_buffer:
+ kmem_free(buffer);
if (error)
xfs_warn(log->l_mp, "failed to find log head");
return error;
@@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int count,
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t *rblk,
struct xlog_rec_header **rhead,
bool *wrapped)
@@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr(
*/
end_blk = head_blk > tail_blk ? tail_blk : 0;
for (i = (int) head_blk - 1; i >= end_blk; i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr(
*/
if (tail_blk >= head_blk && found != count) {
for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -969,7 +889,7 @@ xlog_seek_logrec_hdr(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int count,
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t *rblk,
struct xlog_rec_header **rhead,
bool *wrapped)
@@ -988,7 +908,7 @@ xlog_seek_logrec_hdr(
*/
end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
for (i = (int) tail_blk; i <= end_blk; i++) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr(
*/
if (tail_blk > head_blk && found != count) {
for (i = 0; i < (int) head_blk; i++) {
- error = xlog_bread(log, i, 1, bp, &offset);
+ error = xlog_bread(log, i, 1, buffer, &offset);
if (error)
goto out_error;
@@ -1069,22 +989,22 @@ xlog_verify_tail(
int hsize)
{
struct xlog_rec_header *thead;
- struct xfs_buf *bp;
+ char *buffer;
xfs_daddr_t first_bad;
int error = 0;
bool wrapped;
xfs_daddr_t tmp_tail;
xfs_daddr_t orig_tail = *tail_blk;
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
/*
* Make sure the tail points to a record (returns positive count on
* success).
*/
- error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
+ error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer,
&tmp_tail, &thead, &wrapped);
if (error < 0)
goto out;
@@ -1113,8 +1033,8 @@ xlog_verify_tail(
break;
/* skip to the next record; returns positive count on success */
- error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
- &tmp_tail, &thead, &wrapped);
+ error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2,
+ buffer, &tmp_tail, &thead, &wrapped);
if (error < 0)
goto out;
@@ -1129,7 +1049,7 @@ xlog_verify_tail(
"Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
orig_tail, *tail_blk);
out:
- xlog_put_bp(bp);
+ kmem_free(buffer);
return error;
}
@@ -1151,13 +1071,13 @@ xlog_verify_head(
struct xlog *log,
xfs_daddr_t *head_blk, /* in/out: unverified head */
xfs_daddr_t *tail_blk, /* out: tail block */
- struct xfs_buf *bp,
+ char *buffer,
xfs_daddr_t *rhead_blk, /* start blk of last record */
struct xlog_rec_header **rhead, /* ptr to last record */
bool *wrapped) /* last rec. wraps phys. log */
{
struct xlog_rec_header *tmp_rhead;
- struct xfs_buf *tmp_bp;
+ char *tmp_buffer;
xfs_daddr_t first_bad;
xfs_daddr_t tmp_rhead_blk;
int found;
@@ -1168,15 +1088,15 @@ xlog_verify_head(
* Check the head of the log for torn writes. Search backwards from the
* head until we hit the tail or the maximum number of log record I/Os
* that could have been in flight at one time. Use a temporary buffer so
- * we don't trash the rhead/bp pointers from the caller.
+ * we don't trash the rhead/buffer pointers from the caller.
*/
- tmp_bp = xlog_get_bp(log, 1);
- if (!tmp_bp)
+ tmp_buffer = xlog_alloc_buffer(log, 1);
+ if (!tmp_buffer)
return -ENOMEM;
error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
- XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
- &tmp_rhead, &tmp_wrapped);
- xlog_put_bp(tmp_bp);
+ XLOG_MAX_ICLOGS, tmp_buffer,
+ &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
+ kmem_free(tmp_buffer);
if (error < 0)
return error;
@@ -1205,8 +1125,8 @@ xlog_verify_head(
* (i.e., the records with invalid CRC) if the cycle number
* matches the the current cycle.
*/
- found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
- rhead_blk, rhead, wrapped);
+ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1,
+ buffer, rhead_blk, rhead, wrapped);
if (found < 0)
return found;
if (found == 0) /* XXX: right thing to do here? */
@@ -1266,7 +1186,7 @@ xlog_check_unmount_rec(
xfs_daddr_t *tail_blk,
struct xlog_rec_header *rhead,
xfs_daddr_t rhead_blk,
- struct xfs_buf *bp,
+ char *buffer,
bool *clean)
{
struct xlog_op_header *op_head;
@@ -1309,7 +1229,7 @@ xlog_check_unmount_rec(
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
- error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
+ error = xlog_bread(log, umount_data_blk, 1, buffer, &offset);
if (error)
return error;
@@ -1388,7 +1308,7 @@ xlog_find_tail(
{
xlog_rec_header_t *rhead;
char *offset = NULL;
- xfs_buf_t *bp;
+ char *buffer;
int error;
xfs_daddr_t rhead_blk;
xfs_lsn_t tail_lsn;
@@ -1402,11 +1322,11 @@ xlog_find_tail(
return error;
ASSERT(*head_blk < INT_MAX);
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
if (*head_blk == 0) { /* special case */
- error = xlog_bread(log, 0, 1, bp, &offset);
+ error = xlog_bread(log, 0, 1, buffer, &offset);
if (error)
goto done;
@@ -1422,7 +1342,7 @@ xlog_find_tail(
* block. This wraps all the way back around to the head so something is
* seriously wrong if we can't find it.
*/
- error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer,
&rhead_blk, &rhead, &wrapped);
if (error < 0)
return error;
@@ -1443,7 +1363,7 @@ xlog_find_tail(
* state to determine whether recovery is necessary.
*/
error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
- rhead_blk, bp, &clean);
+ rhead_blk, buffer, &clean);
if (error)
goto done;
@@ -1460,7 +1380,7 @@ xlog_find_tail(
if (!clean) {
xfs_daddr_t orig_head = *head_blk;
- error = xlog_verify_head(log, head_blk, tail_blk, bp,
+ error = xlog_verify_head(log, head_blk, tail_blk, buffer,
&rhead_blk, &rhead, &wrapped);
if (error)
goto done;
@@ -1471,7 +1391,7 @@ xlog_find_tail(
wrapped);
tail_lsn = atomic64_read(&log->l_tail_lsn);
error = xlog_check_unmount_rec(log, head_blk, tail_blk,
- rhead, rhead_blk, bp,
+ rhead, rhead_blk, buffer,
&clean);
if (error)
goto done;
@@ -1505,11 +1425,11 @@ xlog_find_tail(
* But... if the -device- itself is readonly, just skip this.
* We can't recover this device anyway, so it won't matter.
*/
- if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
+ if (!xfs_readonly_buftarg(log->l_targ))
error = xlog_clear_stale_blocks(log, tail_lsn);
done:
- xlog_put_bp(bp);
+ kmem_free(buffer);
if (error)
xfs_warn(log->l_mp, "failed to locate log tail");
@@ -1537,7 +1457,7 @@ xlog_find_zeroed(
struct xlog *log,
xfs_daddr_t *blk_no)
{
- xfs_buf_t *bp;
+ char *buffer;
char *offset;
uint first_cycle, last_cycle;
xfs_daddr_t new_blk, last_blk, start_blk;
@@ -1547,35 +1467,36 @@ xlog_find_zeroed(
*blk_no = 0;
/* check totally zeroed log */
- bp = xlog_get_bp(log, 1);
- if (!bp)
+ buffer = xlog_alloc_buffer(log, 1);
+ if (!buffer)
return -ENOMEM;
- error = xlog_bread(log, 0, 1, bp, &offset);
+ error = xlog_bread(log, 0, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
first_cycle = xlog_get_cycle(offset);
if (first_cycle == 0) { /* completely zeroed log */
*blk_no = 0;
- xlog_put_bp(bp);
+ kmem_free(buffer);
return 1;
}
/* check partially zeroed log */
- error = xlog_bread(log, log_bbnum-1, 1, bp, &offset);
+ error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset);
if (error)
- goto bp_err;
+ goto out_free_buffer;
last_cycle = xlog_get_cycle(offset);
if (last_cycle != 0) { /* log completely written to */
- xlog_put_bp(bp);
+ kmem_free(buffer);
return 0;
}
/* we have a partially zeroed log */
last_blk = log_bbnum-1;
- if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0)))
- goto bp_err;
+ error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0);
+ if (error)
+ goto out_free_buffer;
/*
* Validate the answer. Because there is no way to guarantee that
@@ -1598,7 +1519,7 @@ xlog_find_zeroed(
*/
if ((error = xlog_find_verify_cycle(log, start_blk,
(int)num_scan_bblks, 0, &new_blk)))
- goto bp_err;
+ goto out_free_buffer;
if (new_blk != -1)
last_blk = new_blk;
@@ -1610,11 +1531,11 @@ xlog_find_zeroed(
if (error == 1)
error = -EIO;
if (error)
- goto bp_err;
+ goto out_free_buffer;
*blk_no = last_blk;
-bp_err:
- xlog_put_bp(bp);
+out_free_buffer:
+ kmem_free(buffer);
if (error)
return error;
return 1;
@@ -1657,7 +1578,7 @@ xlog_write_log_records(
int tail_block)
{
char *offset;
- xfs_buf_t *bp;
+ char *buffer;
int balign, ealign;
int sectbb = log->l_sectBBsize;
int end_block = start_block + blocks;
@@ -1674,7 +1595,7 @@ xlog_write_log_records(
bufblks = 1 << ffs(blocks);
while (bufblks > log->l_logBBsize)
bufblks >>= 1;
- while (!(bp = xlog_get_bp(log, bufblks))) {
+ while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
bufblks >>= 1;
if (bufblks < sectbb)
return -ENOMEM;
@@ -1686,9 +1607,9 @@ xlog_write_log_records(
*/
balign = round_down(start_block, sectbb);
if (balign != start_block) {
- error = xlog_bread_noalign(log, start_block, 1, bp);
+ error = xlog_bread_noalign(log, start_block, 1, buffer);
if (error)
- goto out_put_bp;
+ goto out_free_buffer;
j = start_block - balign;
}
@@ -1705,29 +1626,28 @@ xlog_write_log_records(
*/
ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) {
- offset = bp->b_addr + BBTOB(ealign - start_block);
- error = xlog_bread_offset(log, ealign, sectbb,
- bp, offset);
+ error = xlog_bread_noalign(log, ealign, sectbb,
+ buffer + BBTOB(ealign - start_block));
if (error)
break;
}
- offset = xlog_align(log, start_block, endcount, bp);
+ offset = buffer + xlog_align(log, start_block);
for (; j < endcount; j++) {
xlog_add_record(log, offset, cycle, i+j,
tail_cycle, tail_block);
offset += BBSIZE;
}
- error = xlog_bwrite(log, start_block, endcount, bp);
+ error = xlog_bwrite(log, start_block, endcount, buffer);
if (error)
break;
start_block += endcount;
j = 0;
}
- out_put_bp:
- xlog_put_bp(bp);
+out_free_buffer:
+ kmem_free(buffer);
return error;
}
@@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer(
if (xfs_sb_version_hascrc(&mp->m_sb))
bp->b_ops = &xfs_inode_buf_ops;
- inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog;
+ inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
offsetof(xfs_dinode_t, di_next_unlinked);
@@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer(
ASSERT(item->ri_buf[item_index].i_addr != NULL);
ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
- ASSERT((reg_buf_offset + reg_buf_bytes) <=
- BBTOB(bp->b_io_length));
+ ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
/*
* The current logged region contains a copy of the
@@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer(
ASSERT(nbits > 0);
ASSERT(item->ri_buf[i].i_addr != NULL);
ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
- ASSERT(BBTOB(bp->b_io_length) >=
+ ASSERT(BBTOB(bp->b_length) >=
((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
/*
@@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2(
*
* Also make sure that only inode buffers with good sizes stay in
* the buffer cache. The kernel moves inodes in buffers of 1 block
- * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode
+ * or inode_cluster_size bytes, whichever is bigger. The inode
* buffers in the log can be a different size if the log was generated
* by an older kernel using unclustered inode buffers or a newer kernel
* running with a different inode cluster size. Regardless, if the
- * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size)
- * for *our* value of mp->m_inode_cluster_size, then we need to keep
+ * the inode buffer size isn't max(blocksize, inode_cluster_size)
+ * for *our* value of inode_cluster_size, then we need to keep
* the buffer out of the buffer cache so that the buffer won't
* overlap with future reads of those inodes.
*/
if (XFS_DINODE_MAGIC ==
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
- (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize,
- (uint32_t)log->l_mp->m_inode_cluster_size))) {
+ (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
xfs_buf_stale(bp);
error = xfs_bwrite(bp);
} else {
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
}
@@ -3260,7 +3178,7 @@ out_owner_change:
/* re-generate the checksum. */
xfs_dinode_calc_crc(log->l_mp, dip);
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
@@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2(
}
ASSERT(dq_f->qlf_size == 2);
- ASSERT(bp->b_target->bt_mount == mp);
+ ASSERT(bp->b_mount == mp);
bp->b_iodone = xlog_recover_iodone;
xfs_buf_delwri_queue(bp, buffer_list);
@@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2(
{
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
uint64_t efi_id;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp = log->l_ailp;
@@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2(
{
struct xfs_mount *mp = log->l_mp;
struct xfs_icreate_log *icl;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_agnumber_t agno;
xfs_agblock_t agbno;
unsigned int count;
@@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2(
/*
* The inode chunk is either full or sparse and we only support
- * m_ialloc_min_blks sized sparse allocations at this time.
+ * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
*/
- if (length != mp->m_ialloc_blks &&
- length != mp->m_ialloc_min_blks) {
+ if (length != igeo->ialloc_blks &&
+ length != igeo->ialloc_min_blks) {
xfs_warn(log->l_mp,
"%s: unsupported chunk length", __FUNCTION__);
return -EINVAL;
@@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2(
* buffers for cancellation so we don't overwrite anything written after
* a cancellation.
*/
- bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster);
- nbufs = length / mp->m_blocks_per_cluster;
+ bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
+ nbufs = length / igeo->blocks_per_cluster;
for (i = 0, cancel_count = 0; i < nbufs; i++) {
xfs_daddr_t daddr;
daddr = XFS_AGB_TO_DADDR(mp, agno,
- agbno + i * mp->m_blocks_per_cluster);
+ agbno + i * igeo->blocks_per_cluster);
if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
cancel_count++;
}
@@ -4956,12 +4875,11 @@ out:
* A cancel occurs when the mount has failed and we're bailing out.
* Release all pending log intent items so they don't pin the AIL.
*/
-STATIC int
+STATIC void
xlog_recover_cancel_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
- int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
@@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents(
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->ail_lock);
- return error;
}
/*
@@ -5307,7 +5224,7 @@ xlog_do_recovery_pass(
xfs_daddr_t blk_no, rblk_no;
xfs_daddr_t rhead_blk;
char *offset;
- xfs_buf_t *hbp, *dbp;
+ char *hbp, *dbp;
int error = 0, h_size, h_len;
int error2 = 0;
int bblks, split_bblks;
@@ -5332,7 +5249,7 @@ xlog_do_recovery_pass(
* iclog header and extract the header size from it. Get a
* new hbp that is the correct size.
*/
- hbp = xlog_get_bp(log, 1);
+ hbp = xlog_alloc_buffer(log, 1);
if (!hbp)
return -ENOMEM;
@@ -5374,23 +5291,23 @@ xlog_do_recovery_pass(
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
if (h_size % XLOG_HEADER_CYCLE_SIZE)
hblks++;
- xlog_put_bp(hbp);
- hbp = xlog_get_bp(log, hblks);
+ kmem_free(hbp);
+ hbp = xlog_alloc_buffer(log, hblks);
} else {
hblks = 1;
}
} else {
ASSERT(log->l_sectBBsize == 1);
hblks = 1;
- hbp = xlog_get_bp(log, 1);
+ hbp = xlog_alloc_buffer(log, 1);
h_size = XLOG_BIG_RECORD_BSIZE;
}
if (!hbp)
return -ENOMEM;
- dbp = xlog_get_bp(log, BTOBB(h_size));
+ dbp = xlog_alloc_buffer(log, BTOBB(h_size));
if (!dbp) {
- xlog_put_bp(hbp);
+ kmem_free(hbp);
return -ENOMEM;
}
@@ -5405,7 +5322,7 @@ xlog_do_recovery_pass(
/*
* Check for header wrapping around physical end-of-log
*/
- offset = hbp->b_addr;
+ offset = hbp;
split_hblks = 0;
wrapped_hblks = 0;
if (blk_no + hblks <= log->l_logBBsize) {
@@ -5441,8 +5358,8 @@ xlog_do_recovery_pass(
* - order is important.
*/
wrapped_hblks = hblks - split_hblks;
- error = xlog_bread_offset(log, 0,
- wrapped_hblks, hbp,
+ error = xlog_bread_noalign(log, 0,
+ wrapped_hblks,
offset + BBTOB(split_hblks));
if (error)
goto bread_err2;
@@ -5473,7 +5390,7 @@ xlog_do_recovery_pass(
} else {
/* This log record is split across the
* physical end of log */
- offset = dbp->b_addr;
+ offset = dbp;
split_bblks = 0;
if (blk_no != log->l_logBBsize) {
/* some data is before the physical
@@ -5502,8 +5419,8 @@ xlog_do_recovery_pass(
* _first_, then the log start (LR header end)
* - order is important.
*/
- error = xlog_bread_offset(log, 0,
- bblks - split_bblks, dbp,
+ error = xlog_bread_noalign(log, 0,
+ bblks - split_bblks,
offset + BBTOB(split_bblks));
if (error)
goto bread_err2;
@@ -5551,9 +5468,9 @@ xlog_do_recovery_pass(
}
bread_err2:
- xlog_put_bp(dbp);
+ kmem_free(dbp);
bread_err1:
- xlog_put_bp(hbp);
+ kmem_free(hbp);
/*
* Submit buffers that have been added from the last record processed,
@@ -5687,7 +5604,7 @@ xlog_do_recover(
* Now that we've finished replaying all buffer and inode
* updates, re-read in the superblock and reverify it.
*/
- bp = xfs_getsb(mp, 0);
+ bp = xfs_getsb(mp);
bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
ASSERT(!(bp->b_flags & XBF_WRITE));
bp->b_flags |= XBF_READ;
@@ -5860,16 +5777,12 @@ xlog_recover_finish(
return 0;
}
-int
+void
xlog_recover_cancel(
struct xlog *log)
{
- int error = 0;
-
if (log->l_flags & XLOG_RECOVERY_NEEDED)
- error = xlog_recover_cancel_intents(log);
-
- return error;
+ xlog_recover_cancel_intents(log);
}
#if defined(DEBUG)
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 6b736ea58d35..9804efe525a9 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -6,8 +6,8 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_error.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
-#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6b2bfe81dc51..322da6909290 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -12,9 +12,6 @@
#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_ialloc.h"
@@ -27,7 +24,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_fsops.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_sysfs.h"
#include "xfs_rmap_btree.h"
@@ -430,30 +426,6 @@ xfs_update_alignment(xfs_mount_t *mp)
}
/*
- * Set the maximum inode count for this filesystem
- */
-STATIC void
-xfs_set_maxicount(xfs_mount_t *mp)
-{
- xfs_sb_t *sbp = &(mp->m_sb);
- uint64_t icount;
-
- if (sbp->sb_imax_pct) {
- /*
- * Make sure the maximum inode count is a multiple
- * of the units we allocate inodes in.
- */
- icount = sbp->sb_dblocks * sbp->sb_imax_pct;
- do_div(icount, 100);
- do_div(icount, mp->m_ialloc_blks);
- mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
- sbp->sb_inopblog;
- } else {
- mp->m_maxicount = 0;
- }
-}
-
-/*
* Set the default minimum read and write sizes unless
* already specified in a mount option.
* We use smaller I/O sizes when the file system
@@ -509,29 +481,6 @@ xfs_set_low_space_thresholds(
}
}
-
-/*
- * Set whether we're using inode alignment.
- */
-STATIC void
-xfs_set_inoalignment(xfs_mount_t *mp)
-{
- if (xfs_sb_version_hasalign(&mp->m_sb) &&
- mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
- mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
- else
- mp->m_inoalign_mask = 0;
- /*
- * If we are using stripe alignment, check whether
- * the stripe unit is a multiple of the inode alignment
- */
- if (mp->m_dalign && mp->m_inoalign_mask &&
- !(mp->m_dalign & mp->m_inoalign_mask))
- mp->m_sinoalign = mp->m_dalign;
- else
- mp->m_sinoalign = 0;
-}
-
/*
* Check that the data (and log if separate) is an ok size.
*/
@@ -683,6 +632,7 @@ xfs_mountfs(
{
struct xfs_sb *sbp = &(mp->m_sb);
struct xfs_inode *rip;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
uint64_t resblks;
uint quotamount = 0;
uint quotaflags = 0;
@@ -749,12 +699,10 @@ xfs_mountfs(
xfs_alloc_compute_maxlevels(mp);
xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
- xfs_ialloc_compute_maxlevels(mp);
+ xfs_ialloc_setup_geometry(mp);
xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
- xfs_set_maxicount(mp);
-
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
@@ -788,50 +736,22 @@ xfs_mountfs(
xfs_set_low_space_thresholds(mp);
/*
- * Set the inode cluster size.
- * This may still be overridden by the file system
- * block size if it is larger than the chosen cluster size.
- *
- * For v5 filesystems, scale the cluster size with the inode size to
- * keep a constant ratio of inode per cluster buffer, but only if mkfs
- * has set the inode alignment value appropriately for larger cluster
- * sizes.
- */
- mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
- int new_size = mp->m_inode_cluster_size;
-
- new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
- if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
- mp->m_inode_cluster_size = new_size;
- }
- mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
- mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
- mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
- mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
-
- /*
* If enabled, sparse inode chunk alignment is expected to match the
* cluster size. Full inode chunk alignment must match the chunk size,
* but that is checked on sb read verification...
*/
if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
mp->m_sb.sb_spino_align !=
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) {
+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
xfs_warn(mp,
"Sparse inode block alignment (%u) must match cluster size (%llu).",
mp->m_sb.sb_spino_align,
- XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size));
+ XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
error = -EINVAL;
goto out_remove_uuid;
}
/*
- * Set inode alignment fields
- */
- xfs_set_inoalignment(mp);
-
- /*
* Check that the data (and log if separate) is an ok size.
*/
error = xfs_check_sizes(mp);
@@ -1385,24 +1305,14 @@ xfs_mod_frextents(
* xfs_getsb() is called to obtain the buffer for the superblock.
* The buffer is returned locked and read in from disk.
* The buffer should be released with a call to xfs_brelse().
- *
- * If the flags parameter is BUF_TRYLOCK, then we'll only return
- * the superblock buffer if it can be locked without sleeping.
- * If it can't then we'll return NULL.
*/
struct xfs_buf *
xfs_getsb(
- struct xfs_mount *mp,
- int flags)
+ struct xfs_mount *mp)
{
struct xfs_buf *bp = mp->m_sb_bp;
- if (!xfs_buf_trylock(bp)) {
- if (flags & XBF_TRYLOCK)
- return NULL;
- xfs_buf_lock(bp);
- }
-
+ xfs_buf_lock(bp);
xfs_buf_hold(bp);
ASSERT(bp->b_flags & XBF_DONE);
return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index c81a5cd7c228..4adb6837439a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -105,6 +105,7 @@ typedef struct xfs_mount {
struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
struct xlog *m_log; /* log specific stuff */
+ struct xfs_ino_geometry m_ino_geo; /* inode geometry */
int m_logbufs; /* number of log buffers */
int m_logbsize; /* size of each log buffer */
uint m_rsumlevels; /* rt summary levels */
@@ -126,12 +127,6 @@ typedef struct xfs_mount {
uint8_t m_blkbit_log; /* blocklog + NBBY */
uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
uint8_t m_agno_log; /* log #ag's */
- uint8_t m_agino_log; /* #bits for agino in inum */
- uint m_inode_cluster_size;/* min inode buf size */
- unsigned int m_inodes_per_cluster;
- unsigned int m_blocks_per_cluster;
- unsigned int m_cluster_align;
- unsigned int m_cluster_align_inodes;
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
@@ -139,15 +134,12 @@ typedef struct xfs_mount {
uint m_alloc_mnr[2]; /* min alloc btree records */
uint m_bmap_dmxr[2]; /* max bmap btree records */
uint m_bmap_dmnr[2]; /* min bmap btree records */
- uint m_inobt_mxr[2]; /* max inobt btree records */
- uint m_inobt_mnr[2]; /* min inobt btree records */
uint m_rmap_mxr[2]; /* max rmap btree records */
uint m_rmap_mnr[2]; /* min rmap btree records */
uint m_refc_mxr[2]; /* max refc btree records */
uint m_refc_mnr[2]; /* min refc btree records */
uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
- uint m_in_maxlevels; /* max inobt btree levels. */
uint m_rmap_maxlevels; /* max rmap btree levels */
uint m_refc_maxlevels; /* max refcount btree level */
xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
@@ -159,20 +151,13 @@ typedef struct xfs_mount {
int m_fixedfsid[2]; /* unchanged for life of FS */
uint64_t m_flags; /* global mount flags */
bool m_finobt_nores; /* no per-AG finobt resv. */
- int m_ialloc_inos; /* inodes in inode allocation */
- int m_ialloc_blks; /* blocks in inode allocation */
- int m_ialloc_min_blks;/* min blocks in sparse inode
- * allocation */
- int m_inoalign_mask;/* mask sb_inoalignmt if used */
uint m_qflags; /* quota status flags */
struct xfs_trans_resv m_resv; /* precomputed res values */
- uint64_t m_maxicount; /* maximum inode count */
uint64_t m_resblks; /* total reserved blocks */
uint64_t m_resblks_avail;/* available reserved blocks */
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
- int m_sinoalign; /* stripe unit inode alignment */
uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
@@ -198,7 +183,6 @@ typedef struct xfs_mount {
struct workqueue_struct *m_unwritten_workqueue;
struct workqueue_struct *m_cil_workqueue;
struct workqueue_struct *m_reclaim_workqueue;
- struct workqueue_struct *m_log_workqueue;
struct workqueue_struct *m_eofblocks_workqueue;
struct workqueue_struct *m_sync_workqueue;
@@ -226,6 +210,8 @@ typedef struct xfs_mount {
#endif
} xfs_mount_t;
+#define M_IGEO(mp) (&(mp)->m_ino_geo)
+
/*
* Flags for m_flags.
*/
@@ -465,7 +451,7 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
bool reserved);
extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
-extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
+extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index c8ba98fae30a..b6701b4f59a9 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0);
XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0);
XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0);
+
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index bde2c9f56a46..0c954cad7449 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -2,23 +2,16 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
-#include <linux/iomap.h>
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_log.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_iomap.h"
-#include "xfs_shared.h"
-#include "xfs_bit.h"
-#include "xfs_pnfs.h"
/*
* Ensure that we do not have any outstanding pNFS layouts that can be used by
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
new file mode 100644
index 000000000000..4bcc3e61056c
--- /dev/null
+++ b/fs/xfs/xfs_pwork.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_trace.h"
+#include "xfs_sysctl.h"
+#include "xfs_pwork.h"
+#include <linux/nmi.h>
+
+/*
+ * Parallel Work Queue
+ * ===================
+ *
+ * Abstract away the details of running a large and "obviously" parallelizable
+ * task across multiple CPUs. Callers initialize the pwork control object with
+ * a desired level of parallelization and a work function. Next, they embed
+ * struct xfs_pwork in whatever structure they use to pass work context to a
+ * worker thread and queue that pwork. The work function will be passed the
+ * pwork item when it is run (from process context) and any returned error will
+ * be recorded in xfs_pwork_ctl.error. Work functions should check for errors
+ * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
+ * stop workqueue item processing.
+ *
+ * This is the rough equivalent of the xfsprogs workqueue code, though we can't
+ * reuse that name here.
+ */
+
+/* Invoke our caller's function. */
+static void
+xfs_pwork_work(
+ struct work_struct *work)
+{
+ struct xfs_pwork *pwork;
+ struct xfs_pwork_ctl *pctl;
+ int error;
+
+ pwork = container_of(work, struct xfs_pwork, work);
+ pctl = pwork->pctl;
+ error = pctl->work_fn(pctl->mp, pwork);
+ if (error && !pctl->error)
+ pctl->error = error;
+ if (atomic_dec_and_test(&pctl->nr_work))
+ wake_up(&pctl->poll_wait);
+}
+
+/*
+ * Set up control data for parallel work. @work_fn is the function that will
+ * be called. @tag will be written into the kernel threads. @nr_threads is
+ * the level of parallelism desired, or 0 for no limit.
+ */
+int
+xfs_pwork_init(
+ struct xfs_mount *mp,
+ struct xfs_pwork_ctl *pctl,
+ xfs_pwork_work_fn work_fn,
+ const char *tag,
+ unsigned int nr_threads)
+{
+#ifdef DEBUG
+ if (xfs_globals.pwork_threads >= 0)
+ nr_threads = xfs_globals.pwork_threads;
+#endif
+ trace_xfs_pwork_init(mp, nr_threads, current->pid);
+
+ pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
+ current->pid);
+ if (!pctl->wq)
+ return -ENOMEM;
+ pctl->work_fn = work_fn;
+ pctl->error = 0;
+ pctl->mp = mp;
+ atomic_set(&pctl->nr_work, 0);
+ init_waitqueue_head(&pctl->poll_wait);
+
+ return 0;
+}
+
+/* Queue some parallel work. */
+void
+xfs_pwork_queue(
+ struct xfs_pwork_ctl *pctl,
+ struct xfs_pwork *pwork)
+{
+ INIT_WORK(&pwork->work, xfs_pwork_work);
+ pwork->pctl = pctl;
+ atomic_inc(&pctl->nr_work);
+ queue_work(pctl->wq, &pwork->work);
+}
+
+/* Wait for the work to finish and tear down the control structure. */
+int
+xfs_pwork_destroy(
+ struct xfs_pwork_ctl *pctl)
+{
+ destroy_workqueue(pctl->wq);
+ pctl->wq = NULL;
+ return pctl->error;
+}
+
+/*
+ * Wait for the work to finish by polling completion status and touch the soft
+ * lockup watchdog. This is for callers such as mount which hold locks.
+ */
+void
+xfs_pwork_poll(
+ struct xfs_pwork_ctl *pctl)
+{
+ while (wait_event_timeout(pctl->poll_wait,
+ atomic_read(&pctl->nr_work) == 0, HZ) == 0)
+ touch_softlockup_watchdog();
+}
+
+/*
+ * Return the amount of parallelism that the data device can handle, or 0 for
+ * no limit.
+ */
+unsigned int
+xfs_pwork_guess_datadev_parallelism(
+ struct xfs_mount *mp)
+{
+ struct xfs_buftarg *btp = mp->m_ddev_targp;
+
+ /*
+ * For now we'll go with the most conservative setting possible,
+ * which is two threads for an SSD and 1 thread everywhere else.
+ */
+ return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
+}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
new file mode 100644
index 000000000000..8133124cf3bb
--- /dev/null
+++ b/fs/xfs/xfs_pwork.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_PWORK_H__
+#define __XFS_PWORK_H__
+
+struct xfs_pwork;
+struct xfs_mount;
+
+typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork);
+
+/*
+ * Parallel work coordination structure.
+ */
+struct xfs_pwork_ctl {
+ struct workqueue_struct *wq;
+ struct xfs_mount *mp;
+ xfs_pwork_work_fn work_fn;
+ struct wait_queue_head poll_wait;
+ atomic_t nr_work;
+ int error;
+};
+
+/*
+ * Embed this parallel work control item inside your own work structure,
+ * then queue work with it.
+ */
+struct xfs_pwork {
+ struct work_struct work;
+ struct xfs_pwork_ctl *pctl;
+};
+
+#define XFS_PWORK_SINGLE_THREADED { .pctl = NULL }
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_ctl_want_abort(
+ struct xfs_pwork_ctl *pctl)
+{
+ return pctl && pctl->error;
+}
+
+/* Have we been told to abort? */
+static inline bool
+xfs_pwork_want_abort(
+ struct xfs_pwork *pwork)
+{
+ return xfs_pwork_ctl_want_abort(pwork->pctl);
+}
+
+int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
+ xfs_pwork_work_fn work_fn, const char *tag,
+ unsigned int nr_threads);
+void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
+int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
+void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
+unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
+
+#endif /* __XFS_PWORK_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index aa6b6db3db0e..5e7a37f0cf84 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -13,19 +13,15 @@
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
+#include "xfs_iwalk.h"
#include "xfs_quota.h"
-#include "xfs_error.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_btree.h"
#include "xfs_bmap_util.h"
#include "xfs_trans.h"
#include "xfs_trans_space.h"
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_cksum.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust(
/* ARGSUSED */
STATIC int
xfs_qm_dqusage_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- int *ubused, /* not used */
- int *res) /* result code value */
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ void *data)
{
- xfs_inode_t *ip;
- xfs_qcnt_t nblks;
- xfs_filblks_t rtblks = 0; /* total rt blks */
- int error;
+ struct xfs_inode *ip;
+ xfs_qcnt_t nblks;
+ xfs_filblks_t rtblks = 0; /* total rt blks */
+ int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust(
* rootino must have its resources accounted for, not so with the quota
* inodes.
*/
- if (xfs_is_quota_inode(&mp->m_sb, ino)) {
- *res = BULKSTAT_RV_NOTHING;
- return -EINVAL;
- }
+ if (xfs_is_quota_inode(&mp->m_sb, ino))
+ return 0;
/*
* We don't _need_ to take the ilock EXCL here because quotacheck runs
* at mount time and therefore nobody will be racing chown/chproj.
*/
- error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip);
- if (error) {
- *res = BULKSTAT_RV_NOTHING;
+ error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
+ if (error == -EINVAL || error == -ENOENT)
+ return 0;
+ if (error)
return error;
- }
ASSERT(ip->i_delayed_blks == 0);
@@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust(
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
goto error0;
}
@@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust(
goto error0;
}
- xfs_irele(ip);
- *res = BULKSTAT_RV_DIDONE;
- return 0;
-
error0:
xfs_irele(ip);
- *res = BULKSTAT_RV_GIVEUP;
return error;
}
@@ -1270,18 +1257,13 @@ STATIC int
xfs_qm_quotacheck(
xfs_mount_t *mp)
{
- int done, count, error, error2;
- xfs_ino_t lastino;
- size_t structsz;
+ int error, error2;
uint flags;
LIST_HEAD (buffer_list);
struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip;
struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip;
struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip;
- count = INT_MAX;
- structsz = 1;
- lastino = 0;
flags = 0;
ASSERT(uip || gip || pip);
@@ -1318,18 +1300,10 @@ xfs_qm_quotacheck(
flags |= XFS_PQUOTA_CHKD;
}
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters in core.
- */
- error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust,
- structsz, NULL, &done);
- if (error)
- break;
-
- } while (!done);
+ error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
+ NULL);
+ if (error)
+ goto error_return;
/*
* We've made all the changes that we need to make incore. Flush them
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3091e4bc04ef..5d72e88598b4 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -5,13 +5,13 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index b3190890f096..da7ad0383037 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
-#include <linux/capability.h>
#include "xfs.h"
#include "xfs_fs.h"
@@ -12,17 +11,13 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_qm.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
-#include "xfs_defer.h"
STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index a7c0c657dfaf..cd6c7210a373 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -4,6 +4,7 @@
* All Rights Reserved.
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -11,10 +12,8 @@
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
-#include "xfs_trace.h"
#include "xfs_icache.h"
#include "xfs_qm.h"
-#include <linux/quota.h>
static void
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fce38b56b962..d8288aa0670a 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -14,7 +14,6 @@
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_refcount_item.h"
#include "xfs_log.h"
#include "xfs_refcount.h"
@@ -95,15 +94,6 @@ xfs_cui_item_format(
}
/*
- * Pinning has no meaning for an cui item, so just return.
- */
-STATIC void
-xfs_cui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an CUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the CUI transaction has been successfully committed to make it
@@ -122,71 +112,22 @@ xfs_cui_item_unpin(
}
/*
- * CUI items have no locking or pushing. However, since CUIs are pulled from
- * the AIL when their corresponding CUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the CUI out of
- * the AIL.
- */
-STATIC uint
-xfs_cui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The CUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an CUD isn't going to be
* constructed and thus we free the CUI here directly.
*/
STATIC void
-xfs_cui_item_unlock(
+xfs_cui_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_cui_release(CUI_ITEM(lip));
+ xfs_cui_release(CUI_ITEM(lip));
}
-/*
- * The CUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_cui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The CUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all cui log items.
- */
static const struct xfs_item_ops xfs_cui_item_ops = {
.iop_size = xfs_cui_item_size,
.iop_format = xfs_cui_item_format,
- .iop_pin = xfs_cui_item_pin,
.iop_unpin = xfs_cui_item_unpin,
- .iop_unlock = xfs_cui_item_unlock,
- .iop_committed = xfs_cui_item_committed,
- .iop_push = xfs_cui_item_push,
- .iop_committing = xfs_cui_item_committing,
+ .iop_release = xfs_cui_item_release,
};
/*
@@ -254,126 +195,250 @@ xfs_cud_item_format(
}
/*
- * Pinning has no meaning for an cud item, so just return.
+ * The CUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the CUI and free the
+ * CUD.
*/
STATIC void
-xfs_cud_item_pin(
+xfs_cud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+
+ xfs_cui_release(cudp->cud_cuip);
+ kmem_zone_free(xfs_cud_zone, cudp);
}
-/*
- * Since pinning has no meaning for an cud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_cud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static const struct xfs_item_ops xfs_cud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_cud_item_size,
+ .iop_format = xfs_cud_item_format,
+ .iop_release = xfs_cud_item_release,
+};
+
+static struct xfs_cud_log_item *
+xfs_trans_get_cud(
+ struct xfs_trans *tp,
+ struct xfs_cui_log_item *cuip)
{
+ struct xfs_cud_log_item *cudp;
+
+ cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
+ xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
+ &xfs_cud_item_ops);
+ cudp->cud_cuip = cuip;
+ cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+
+ xfs_trans_add_item(tp, &cudp->cud_item);
+ return cudp;
}
/*
- * There isn't much you can do to push on an cud item. It is simply stuck
- * waiting for the log to be flushed to disk.
+ * Finish an refcount update and log it to the CUD. Note that the
+ * transaction is marked dirty regardless of whether the refcount
+ * update succeeds or fails to support the CUI/CUD lifecycle rules.
*/
-STATIC uint
-xfs_cud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+static int
+xfs_trans_log_finish_refcount_update(
+ struct xfs_trans *tp,
+ struct xfs_cud_log_item *cudp,
+ enum xfs_refcount_intent_type type,
+ xfs_fsblock_t startblock,
+ xfs_extlen_t blockcount,
+ xfs_fsblock_t *new_fsb,
+ xfs_extlen_t *new_len,
+ struct xfs_btree_cur **pcur)
{
- return XFS_ITEM_PINNED;
+ int error;
+
+ error = xfs_refcount_finish_one(tp, type, startblock,
+ blockcount, new_fsb, new_len, pcur);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the CUI and frees the CUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
+
+ return error;
}
-/*
- * The CUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the CUI and free the
- * CUD.
- */
-STATIC void
-xfs_cud_item_unlock(
- struct xfs_log_item *lip)
+/* Sort refcount intents by AG. */
+static int
+xfs_refcount_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+ struct xfs_mount *mp = priv;
+ struct xfs_refcount_intent *ra;
+ struct xfs_refcount_intent *rb;
+
+ ra = container_of(a, struct xfs_refcount_intent, ri_list);
+ rb = container_of(b, struct xfs_refcount_intent, ri_list);
+ return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
+}
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_cui_release(cudp->cud_cuip);
- kmem_zone_free(xfs_cud_zone, cudp);
+/* Get an CUI. */
+STATIC void *
+xfs_refcount_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_cui_log_item *cuip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ cuip = xfs_cui_init(tp->t_mountp, count);
+ ASSERT(cuip != NULL);
+
+ /*
+ * Get a log_item_desc to point at the new item.
+ */
+ xfs_trans_add_item(tp, &cuip->cui_item);
+ return cuip;
+}
+
+/* Set the phys extent flags for this reverse mapping. */
+static void
+xfs_trans_set_refcount_flags(
+ struct xfs_phys_extent *refc,
+ enum xfs_refcount_intent_type type)
+{
+ refc->pe_flags = 0;
+ switch (type) {
+ case XFS_REFCOUNT_INCREASE:
+ case XFS_REFCOUNT_DECREASE:
+ case XFS_REFCOUNT_ALLOC_COW:
+ case XFS_REFCOUNT_FREE_COW:
+ refc->pe_flags |= type;
+ break;
+ default:
+ ASSERT(0);
}
}
-/*
- * When the cud item is committed to disk, all we need to do is delete our
- * reference to our partner cui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_cud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Log refcount updates in the intent item. */
+STATIC void
+xfs_refcount_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
- struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
+ struct xfs_cui_log_item *cuip = intent;
+ struct xfs_refcount_intent *refc;
+ uint next_extent;
+ struct xfs_phys_extent *ext;
+
+ refc = container_of(item, struct xfs_refcount_intent, ri_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
/*
- * Drop the CUI reference regardless of whether the CUD has been
- * aborted. Once the CUD transaction is constructed, it is the sole
- * responsibility of the CUD to release the CUI (even if the CUI is
- * aborted due to log I/O error).
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
*/
- xfs_cui_release(cudp->cud_cuip);
- kmem_zone_free(xfs_cud_zone, cudp);
+ next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
+ ASSERT(next_extent < cuip->cui_format.cui_nextents);
+ ext = &cuip->cui_format.cui_extents[next_extent];
+ ext->pe_startblock = refc->ri_startblock;
+ ext->pe_len = refc->ri_blockcount;
+ xfs_trans_set_refcount_flags(ext, refc->ri_type);
+}
- return (xfs_lsn_t)-1;
+/* Get an CUD so we can process all the deferred refcount updates. */
+STATIC void *
+xfs_refcount_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_cud(tp, intent);
}
-/*
- * The CUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_cud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Process a deferred refcount update. */
+STATIC int
+xfs_refcount_update_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
{
+ struct xfs_refcount_intent *refc;
+ xfs_fsblock_t new_fsb;
+ xfs_extlen_t new_aglen;
+ int error;
+
+ refc = container_of(item, struct xfs_refcount_intent, ri_list);
+ error = xfs_trans_log_finish_refcount_update(tp, done_item,
+ refc->ri_type,
+ refc->ri_startblock,
+ refc->ri_blockcount,
+ &new_fsb, &new_aglen,
+ (struct xfs_btree_cur **)state);
+ /* Did we run out of reservation? Requeue what we didn't finish. */
+ if (!error && new_aglen > 0) {
+ ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
+ refc->ri_type == XFS_REFCOUNT_DECREASE);
+ refc->ri_startblock = new_fsb;
+ refc->ri_blockcount = new_aglen;
+ return -EAGAIN;
+ }
+ kmem_free(refc);
+ return error;
}
-/*
- * This is the ops vector shared by all cud log items.
- */
-static const struct xfs_item_ops xfs_cud_item_ops = {
- .iop_size = xfs_cud_item_size,
- .iop_format = xfs_cud_item_format,
- .iop_pin = xfs_cud_item_pin,
- .iop_unpin = xfs_cud_item_unpin,
- .iop_unlock = xfs_cud_item_unlock,
- .iop_committed = xfs_cud_item_committed,
- .iop_push = xfs_cud_item_push,
- .iop_committing = xfs_cud_item_committing,
-};
+/* Clean up after processing deferred refcounts. */
+STATIC void
+xfs_refcount_update_finish_cleanup(
+ struct xfs_trans *tp,
+ void *state,
+ int error)
+{
+ struct xfs_btree_cur *rcur = state;
-/*
- * Allocate and initialize an cud item with the given number of extents.
- */
-struct xfs_cud_log_item *
-xfs_cud_init(
- struct xfs_mount *mp,
- struct xfs_cui_log_item *cuip)
+ xfs_refcount_finish_one_cleanup(tp, rcur, error);
+}
+/* Abort all pending CUIs. */
+STATIC void
+xfs_refcount_update_abort_intent(
+ void *intent)
{
- struct xfs_cud_log_item *cudp;
+ xfs_cui_release(intent);
+}
- cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops);
- cudp->cud_cuip = cuip;
- cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
+/* Cancel a deferred refcount update. */
+STATIC void
+xfs_refcount_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_refcount_intent *refc;
- return cudp;
+ refc = container_of(item, struct xfs_refcount_intent, ri_list);
+ kmem_free(refc);
}
+const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
+ .max_items = XFS_CUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_refcount_update_diff_items,
+ .create_intent = xfs_refcount_update_create_intent,
+ .abort_intent = xfs_refcount_update_abort_intent,
+ .log_item = xfs_refcount_update_log_item,
+ .create_done = xfs_refcount_update_create_done,
+ .finish_item = xfs_refcount_update_finish_item,
+ .finish_cleanup = xfs_refcount_update_finish_cleanup,
+ .cancel_item = xfs_refcount_update_cancel_item,
+};
+
/*
* Process a refcount update intent item that was recovered from the log.
* We need to update the refcountbt.
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 3896dcc2368f..e47530f30489 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_cui_zone;
extern struct kmem_zone *xfs_cud_zone;
struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
-struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
- struct xfs_cui_log_item *);
void xfs_cui_item_free(struct xfs_cui_log_item *);
void xfs_cui_release(struct xfs_cui_log_item *);
int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 680ae7662a78..c4ec7afd1170 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -11,21 +11,12 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
-#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
-#include "xfs_error.h"
-#include "xfs_dir2.h"
-#include "xfs_dir2_priv.h"
-#include "xfs_ioctl.h"
#include "xfs_trace.h"
-#include "xfs_log.h"
#include "xfs_icache.h"
-#include "xfs_pnfs.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_refcount.h"
@@ -33,11 +24,9 @@
#include "xfs_trans_space.h"
#include "xfs_bit.h"
#include "xfs_alloc.h"
-#include "xfs_quota_defs.h"
#include "xfs_quota.h"
#include "xfs_reflink.h"
#include "xfs_iomap.h"
-#include "xfs_rmap_btree.h"
#include "xfs_sb.h"
#include "xfs_ag_resv.h"
@@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range(
/* Start a rolling transaction to remove the mappings */
error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
- 0, 0, XFS_TRANS_NOFS, &tp);
+ 0, 0, 0, &tp);
if (error)
goto out;
@@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent(
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
- XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp);
+ XFS_TRANS_RESERVE, &tp);
if (error)
return error;
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 127dc9c32a54..77ed557b6127 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -14,7 +14,6 @@
#include "xfs_defer.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
-#include "xfs_buf_item.h"
#include "xfs_rmap_item.h"
#include "xfs_log.h"
#include "xfs_rmap.h"
@@ -94,15 +93,6 @@ xfs_rui_item_format(
}
/*
- * Pinning has no meaning for an rui item, so just return.
- */
-STATIC void
-xfs_rui_item_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
* The unpin operation is the last place an RUI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
* either case, the RUI transaction has been successfully committed to make it
@@ -121,71 +111,22 @@ xfs_rui_item_unpin(
}
/*
- * RUI items have no locking or pushing. However, since RUIs are pulled from
- * the AIL when their corresponding RUDs are committed to disk, their situation
- * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
- * will eventually flush the log. This should help in getting the RUI out of
- * the AIL.
- */
-STATIC uint
-xfs_rui_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
-{
- return XFS_ITEM_PINNED;
-}
-
-/*
* The RUI has been either committed or aborted if the transaction has been
* cancelled. If the transaction was cancelled, an RUD isn't going to be
* constructed and thus we free the RUI here directly.
*/
STATIC void
-xfs_rui_item_unlock(
+xfs_rui_item_release(
struct xfs_log_item *lip)
{
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
- xfs_rui_release(RUI_ITEM(lip));
+ xfs_rui_release(RUI_ITEM(lip));
}
-/*
- * The RUI is logged only once and cannot be moved in the log, so simply return
- * the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_rui_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * The RUI dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
-STATIC void
-xfs_rui_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector shared by all rui log items.
- */
static const struct xfs_item_ops xfs_rui_item_ops = {
.iop_size = xfs_rui_item_size,
.iop_format = xfs_rui_item_format,
- .iop_pin = xfs_rui_item_pin,
.iop_unpin = xfs_rui_item_unpin,
- .iop_unlock = xfs_rui_item_unlock,
- .iop_committed = xfs_rui_item_committed,
- .iop_push = xfs_rui_item_push,
- .iop_committing = xfs_rui_item_committing,
+ .iop_release = xfs_rui_item_release,
};
/*
@@ -275,126 +216,271 @@ xfs_rud_item_format(
}
/*
- * Pinning has no meaning for an rud item, so just return.
+ * The RUD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the RUI and free the
+ * RUD.
*/
STATIC void
-xfs_rud_item_pin(
+xfs_rud_item_release(
struct xfs_log_item *lip)
{
+ struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+
+ xfs_rui_release(rudp->rud_ruip);
+ kmem_zone_free(xfs_rud_zone, rudp);
}
-/*
- * Since pinning has no meaning for an rud item, unpinning does
- * not either.
- */
-STATIC void
-xfs_rud_item_unpin(
- struct xfs_log_item *lip,
- int remove)
+static const struct xfs_item_ops xfs_rud_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_rud_item_size,
+ .iop_format = xfs_rud_item_format,
+ .iop_release = xfs_rud_item_release,
+};
+
+static struct xfs_rud_log_item *
+xfs_trans_get_rud(
+ struct xfs_trans *tp,
+ struct xfs_rui_log_item *ruip)
{
+ struct xfs_rud_log_item *rudp;
+
+ rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
+ xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
+ &xfs_rud_item_ops);
+ rudp->rud_ruip = ruip;
+ rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+
+ xfs_trans_add_item(tp, &rudp->rud_item);
+ return rudp;
}
-/*
- * There isn't much you can do to push on an rud item. It is simply stuck
- * waiting for the log to be flushed to disk.
- */
-STATIC uint
-xfs_rud_item_push(
- struct xfs_log_item *lip,
- struct list_head *buffer_list)
+/* Set the map extent flags for this reverse mapping. */
+static void
+xfs_trans_set_rmap_flags(
+ struct xfs_map_extent *rmap,
+ enum xfs_rmap_intent_type type,
+ int whichfork,
+ xfs_exntst_t state)
{
- return XFS_ITEM_PINNED;
+ rmap->me_flags = 0;
+ if (state == XFS_EXT_UNWRITTEN)
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
+ if (whichfork == XFS_ATTR_FORK)
+ rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
+ switch (type) {
+ case XFS_RMAP_MAP:
+ rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
+ break;
+ case XFS_RMAP_MAP_SHARED:
+ rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
+ break;
+ case XFS_RMAP_UNMAP:
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
+ break;
+ case XFS_RMAP_UNMAP_SHARED:
+ rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
+ break;
+ case XFS_RMAP_CONVERT:
+ rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
+ break;
+ case XFS_RMAP_CONVERT_SHARED:
+ rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
+ break;
+ case XFS_RMAP_ALLOC:
+ rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
+ break;
+ case XFS_RMAP_FREE:
+ rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
+ break;
+ default:
+ ASSERT(0);
+ }
}
/*
- * The RUD is either committed or aborted if the transaction is cancelled. If
- * the transaction is cancelled, drop our reference to the RUI and free the
- * RUD.
+ * Finish an rmap update and log it to the RUD. Note that the transaction is
+ * marked dirty regardless of whether the rmap update succeeds or fails to
+ * support the RUI/RUD lifecycle rules.
*/
-STATIC void
-xfs_rud_item_unlock(
- struct xfs_log_item *lip)
+static int
+xfs_trans_log_finish_rmap_update(
+ struct xfs_trans *tp,
+ struct xfs_rud_log_item *rudp,
+ enum xfs_rmap_intent_type type,
+ uint64_t owner,
+ int whichfork,
+ xfs_fileoff_t startoff,
+ xfs_fsblock_t startblock,
+ xfs_filblks_t blockcount,
+ xfs_exntst_t state,
+ struct xfs_btree_cur **pcur)
{
- struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+ int error;
- if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) {
- xfs_rui_release(rudp->rud_ruip);
- kmem_zone_free(xfs_rud_zone, rudp);
- }
+ error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
+ startblock, blockcount, state, pcur);
+
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the RUI and frees the RUD
+ * 2.) shuts down the filesystem
+ */
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
+
+ return error;
}
-/*
- * When the rud item is committed to disk, all we need to do is delete our
- * reference to our partner rui item and then free ourselves. Since we're
- * freeing ourselves we must return -1 to keep the transaction code from
- * further referencing this item.
- */
-STATIC xfs_lsn_t
-xfs_rud_item_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+/* Sort rmap intents by AG. */
+static int
+xfs_rmap_update_diff_items(
+ void *priv,
+ struct list_head *a,
+ struct list_head *b)
{
- struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
+ struct xfs_mount *mp = priv;
+ struct xfs_rmap_intent *ra;
+ struct xfs_rmap_intent *rb;
+
+ ra = container_of(a, struct xfs_rmap_intent, ri_list);
+ rb = container_of(b, struct xfs_rmap_intent, ri_list);
+ return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
+ XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
+}
+
+/* Get an RUI. */
+STATIC void *
+xfs_rmap_update_create_intent(
+ struct xfs_trans *tp,
+ unsigned int count)
+{
+ struct xfs_rui_log_item *ruip;
+
+ ASSERT(tp != NULL);
+ ASSERT(count > 0);
+
+ ruip = xfs_rui_init(tp->t_mountp, count);
+ ASSERT(ruip != NULL);
/*
- * Drop the RUI reference regardless of whether the RUD has been
- * aborted. Once the RUD transaction is constructed, it is the sole
- * responsibility of the RUD to release the RUI (even if the RUI is
- * aborted due to log I/O error).
+ * Get a log_item_desc to point at the new item.
*/
- xfs_rui_release(rudp->rud_ruip);
- kmem_zone_free(xfs_rud_zone, rudp);
-
- return (xfs_lsn_t)-1;
+ xfs_trans_add_item(tp, &ruip->rui_item);
+ return ruip;
}
-/*
- * The RUD dependency tracking op doesn't do squat. It can't because
- * it doesn't know where the free extent is coming from. The dependency
- * tracking has to be handled by the "enclosing" metadata object. For
- * example, for inodes, the inode is locked throughout the extent freeing
- * so the dependency should be recorded there.
- */
+/* Log rmap updates in the intent item. */
STATIC void
-xfs_rud_item_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
+xfs_rmap_update_log_item(
+ struct xfs_trans *tp,
+ void *intent,
+ struct list_head *item)
{
+ struct xfs_rui_log_item *ruip = intent;
+ struct xfs_rmap_intent *rmap;
+ uint next_extent;
+ struct xfs_map_extent *map;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
+
+ /*
+ * atomic_inc_return gives us the value after the increment;
+ * we want to use it as an array index so we need to subtract 1 from
+ * it.
+ */
+ next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
+ ASSERT(next_extent < ruip->rui_format.rui_nextents);
+ map = &ruip->rui_format.rui_extents[next_extent];
+ map->me_owner = rmap->ri_owner;
+ map->me_startblock = rmap->ri_bmap.br_startblock;
+ map->me_startoff = rmap->ri_bmap.br_startoff;
+ map->me_len = rmap->ri_bmap.br_blockcount;
+ xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
+ rmap->ri_bmap.br_state);
}
-/*
- * This is the ops vector shared by all rud log items.
- */
-static const struct xfs_item_ops xfs_rud_item_ops = {
- .iop_size = xfs_rud_item_size,
- .iop_format = xfs_rud_item_format,
- .iop_pin = xfs_rud_item_pin,
- .iop_unpin = xfs_rud_item_unpin,
- .iop_unlock = xfs_rud_item_unlock,
- .iop_committed = xfs_rud_item_committed,
- .iop_push = xfs_rud_item_push,
- .iop_committing = xfs_rud_item_committing,
-};
+/* Get an RUD so we can process all the deferred rmap updates. */
+STATIC void *
+xfs_rmap_update_create_done(
+ struct xfs_trans *tp,
+ void *intent,
+ unsigned int count)
+{
+ return xfs_trans_get_rud(tp, intent);
+}
-/*
- * Allocate and initialize an rud item with the given number of extents.
- */
-struct xfs_rud_log_item *
-xfs_rud_init(
- struct xfs_mount *mp,
- struct xfs_rui_log_item *ruip)
+/* Process a deferred rmap update. */
+STATIC int
+xfs_rmap_update_finish_item(
+ struct xfs_trans *tp,
+ struct list_head *item,
+ void *done_item,
+ void **state)
+{
+ struct xfs_rmap_intent *rmap;
+ int error;
+
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+ error = xfs_trans_log_finish_rmap_update(tp, done_item,
+ rmap->ri_type,
+ rmap->ri_owner, rmap->ri_whichfork,
+ rmap->ri_bmap.br_startoff,
+ rmap->ri_bmap.br_startblock,
+ rmap->ri_bmap.br_blockcount,
+ rmap->ri_bmap.br_state,
+ (struct xfs_btree_cur **)state);
+ kmem_free(rmap);
+ return error;
+}
+
+/* Clean up after processing deferred rmaps. */
+STATIC void
+xfs_rmap_update_finish_cleanup(
+ struct xfs_trans *tp,
+ void *state,
+ int error)
+{
+ struct xfs_btree_cur *rcur = state;
+
+ xfs_rmap_finish_one_cleanup(tp, rcur, error);
+}
+/* Abort all pending RUIs. */
+STATIC void
+xfs_rmap_update_abort_intent(
+ void *intent)
{
- struct xfs_rud_log_item *rudp;
+ xfs_rui_release(intent);
+}
- rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
- xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops);
- rudp->rud_ruip = ruip;
- rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
+/* Cancel a deferred rmap update. */
+STATIC void
+xfs_rmap_update_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_rmap_intent *rmap;
- return rudp;
+ rmap = container_of(item, struct xfs_rmap_intent, ri_list);
+ kmem_free(rmap);
}
+const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
+ .max_items = XFS_RUI_MAX_FAST_EXTENTS,
+ .diff_items = xfs_rmap_update_diff_items,
+ .create_intent = xfs_rmap_update_create_intent,
+ .abort_intent = xfs_rmap_update_abort_intent,
+ .log_item = xfs_rmap_update_log_item,
+ .create_done = xfs_rmap_update_create_done,
+ .finish_item = xfs_rmap_update_finish_item,
+ .finish_cleanup = xfs_rmap_update_finish_cleanup,
+ .cancel_item = xfs_rmap_update_cancel_item,
+};
+
/*
* Process an rmap update intent item that was recovered from the log.
* We need to update the rmapbt.
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 7e482baa27f5..8708e4a5aa5c 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_rui_zone;
extern struct kmem_zone *xfs_rud_zone;
struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
-struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
- struct xfs_rui_log_item *);
int xfs_rui_copy_format(struct xfs_log_iovec *buf,
struct xfs_rui_log_format *dst_rui_fmt);
void xfs_rui_item_free(struct xfs_rui_log_item *);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ac0fcdad0c4e..5fa4db3c3e32 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -11,17 +11,11 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_space.h"
-#include "xfs_trace.h"
-#include "xfs_buf.h"
#include "xfs_icache.h"
#include "xfs_rtalloc.h"
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index cc509743facd..113883c4f202 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -4,7 +4,6 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/proc_fs.h>
struct xstats xfsstats;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index a14d11d78bd8..f9450235533c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -11,18 +11,15 @@
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_bmap.h"
#include "xfs_alloc.h"
-#include "xfs_error.h"
#include "xfs_fsops.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_extfree_item.h"
#include "xfs_mru_cache.h"
@@ -38,18 +35,8 @@
#include "xfs_refcount_item.h"
#include "xfs_bmap_item.h"
#include "xfs_reflink.h"
-#include "xfs_defer.h"
-#include <linux/namei.h>
-#include <linux/dax.h>
-#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/magic.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/parser.h>
static const struct super_operations xfs_super_operations;
@@ -582,7 +569,7 @@ xfs_set_inode_alloc(
* Calculate how much should be reserved for inodes to meet
* the max inode percentage. Used only for inode32.
*/
- if (mp->m_maxicount) {
+ if (M_IGEO(mp)->maxicount) {
uint64_t icount;
icount = sbp->sb_dblocks * sbp->sb_imax_pct;
@@ -840,16 +827,10 @@ xfs_init_mount_workqueues(
if (!mp->m_reclaim_workqueue)
goto out_destroy_cil;
- mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
- mp->m_fsname);
- if (!mp->m_log_workqueue)
- goto out_destroy_reclaim;
-
mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
if (!mp->m_eofblocks_workqueue)
- goto out_destroy_log;
+ goto out_destroy_reclaim;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
mp->m_fsname);
@@ -860,8 +841,6 @@ xfs_init_mount_workqueues(
out_destroy_eofb:
destroy_workqueue(mp->m_eofblocks_workqueue);
-out_destroy_log:
- destroy_workqueue(mp->m_log_workqueue);
out_destroy_reclaim:
destroy_workqueue(mp->m_reclaim_workqueue);
out_destroy_cil:
@@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues(
{
destroy_workqueue(mp->m_sync_workqueue);
destroy_workqueue(mp->m_eofblocks_workqueue);
- destroy_workqueue(mp->m_log_workqueue);
destroy_workqueue(mp->m_reclaim_workqueue);
destroy_workqueue(mp->m_cil_workqueue);
destroy_workqueue(mp->m_unwritten_workqueue);
@@ -1131,10 +1109,10 @@ xfs_fs_statfs(
fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
- if (mp->m_maxicount)
+ if (M_IGEO(mp)->maxicount)
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
- mp->m_maxicount);
+ M_IGEO(mp)->maxicount);
/* If sb_icount overshot maxicount, report actual allocation */
statp->f_files = max_t(typeof(statp->f_files),
@@ -1685,6 +1663,8 @@ xfs_fs_fill_super(
sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
+ sb->s_iflags |= SB_I_CGROUPWB;
+
set_posix_acl_flag(sb);
/* version 5 superblocks support inode version counters. */
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 21cb49a43d7c..763e43d22dee 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -38,6 +38,18 @@ extern void xfs_qm_exit(void);
# define XFS_SCRUB_STRING
#endif
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+# define XFS_REPAIR_STRING "repair, "
+#else
+# define XFS_REPAIR_STRING
+#endif
+
+#ifdef CONFIG_XFS_WARN
+# define XFS_WARN_STRING "verbose warnings, "
+#else
+# define XFS_WARN_STRING
+#endif
+
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
@@ -49,6 +61,8 @@ extern void xfs_qm_exit(void);
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
XFS_SCRUB_STRING \
+ XFS_REPAIR_STRING \
+ XFS_WARN_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index b2c1177c717f..ed66fd2de327 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -12,23 +12,14 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
-#include "xfs_defer.h"
#include "xfs_dir2.h"
#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
-#include "xfs_bmap_util.h"
-#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
-#include "xfs_symlink.h"
#include "xfs_trans.h"
-#include "xfs_log.h"
/* ----- Kernel only functions below ----- */
int
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 0cc034dfb786..31b3bdbd2eba 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -4,10 +4,7 @@
* All Rights Reserved.
*/
#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
#include "xfs_error.h"
-#include "xfs_stats.h"
static struct ctl_table_header *xfs_table_header;
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index ad7f9be13087..8abf4640f1d5 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -82,6 +82,9 @@ enum {
extern xfs_param_t xfs_params;
struct xfs_globals {
+#ifdef DEBUG
+ int pwork_threads; /* parallel workqueue threads */
+#endif
int log_recovery_delay; /* log recovery delay (secs) */
int mount_delay; /* mount setup delay (secs) */
bool bug_on_assert; /* BUG() the kernel on assert failure */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index cabda13f3c64..ddd0bf7a4740 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -10,9 +10,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sysfs.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_stats.h"
#include "xfs_mount.h"
struct xfs_sysfs_attr {
@@ -206,11 +204,51 @@ always_cow_show(
}
XFS_SYSFS_ATTR_RW(always_cow);
+#ifdef DEBUG
+/*
+ * Override how many threads the parallel work queue is allowed to create.
+ * This has to be a debug-only global (instead of an errortag) because one of
+ * the main users of parallel workqueues is mount time quotacheck.
+ */
+STATIC ssize_t
+pwork_threads_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val < -1 || val > num_possible_cpus())
+ return -EINVAL;
+
+ xfs_globals.pwork_threads = val;
+
+ return count;
+}
+
+STATIC ssize_t
+pwork_threads_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads);
+}
+XFS_SYSFS_ATTR_RW(pwork_threads);
+#endif /* DEBUG */
+
static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(bug_on_assert),
ATTR_LIST(log_recovery_delay),
ATTR_LIST(mount_delay),
ATTR_LIST(always_cow),
+#ifdef DEBUG
+ ATTR_LIST(pwork_threads),
+#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index cb6489c22cad..bc85b89f88ca 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -15,24 +15,16 @@
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_da_btree.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
#include "xfs_trans.h"
-#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_buf_item.h"
#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap_btree.h"
#include "xfs_filestream.h"
#include "xfs_fsmap.h"
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2464ea351f83..8094b1920eef 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
+DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
@@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup);
DEFINE_TRANS_EVENT(xfs_trans_free);
DEFINE_TRANS_EVENT(xfs_trans_roll);
DEFINE_TRANS_EVENT(xfs_trans_add_item);
+DEFINE_TRANS_EVENT(xfs_trans_commit_items);
DEFINE_TRANS_EVENT(xfs_trans_free_items);
TRACE_EVENT(xfs_iunlink_update_bucket,
@@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name, \
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
+TRACE_EVENT(xfs_iwalk_ag,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ xfs_agino_t startino),
+ TP_ARGS(mp, agno, startino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, startino)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->startino = startino;
+ ),
+ TP_printk("dev %d:%d agno %d startino %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->startino)
+)
+
+TRACE_EVENT(xfs_iwalk_ag_rec,
+ TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ struct xfs_inobt_rec_incore *irec),
+ TP_ARGS(mp, agno, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agino_t, startino)
+ __field(uint64_t, freemask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->agno = agno;
+ __entry->startino = irec->ir_startino;
+ __entry->freemask = irec->ir_free;
+ ),
+ TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
+ __entry->startino, __entry->freemask)
+)
+
+TRACE_EVENT(xfs_pwork_init,
+ TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid),
+ TP_ARGS(mp, nr_threads, pid),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, nr_threads)
+ __field(pid_t, pid)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->nr_threads = nr_threads;
+ __entry->pid = pid;
+ ),
+ TP_printk("dev %d:%d nr_threads %u pid %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_threads, __entry->pid)
+)
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 912b42f5fe4a..d42a68d8313b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -11,7 +11,6 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_extent_busy.h"
#include "xfs_quota.h"
#include "xfs_trans.h"
@@ -264,9 +263,7 @@ xfs_trans_alloc(
* GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/
- tp = kmem_zone_zalloc(xfs_trans_zone,
- (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
-
+ tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
@@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas(
xfs_buf_t *bp;
int whole = 0;
- bp = xfs_trans_getsb(tp, tp->t_mountp, 0);
+ bp = xfs_trans_getsb(tp, tp->t_mountp);
sbp = XFS_BUF_TO_SBP(bp);
/*
@@ -767,10 +764,9 @@ xfs_trans_del_item(
}
/* Detach and unlock all of the items in a transaction */
-void
+static void
xfs_trans_free_items(
struct xfs_trans *tp,
- xfs_lsn_t commit_lsn,
bool abort)
{
struct xfs_log_item *lip, *next;
@@ -779,11 +775,10 @@ xfs_trans_free_items(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
- if (commit_lsn != NULLCOMMITLSN)
- lip->li_ops->iop_committing(lip, commit_lsn);
if (abort)
set_bit(XFS_LI_ABORTED, &lip->li_flags);
- lip->li_ops->iop_unlock(lip);
+ if (lip->li_ops->iop_release)
+ lip->li_ops->iop_release(lip);
}
}
@@ -804,7 +799,8 @@ xfs_log_item_batch_insert(
for (i = 0; i < nr_items; i++) {
struct xfs_log_item *lip = log_items[i];
- lip->li_ops->iop_unpin(lip, 0);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 0);
}
}
@@ -815,7 +811,7 @@ xfs_log_item_batch_insert(
*
* If we are called with the aborted flag set, it is because a log write during
* a CIL checkpoint commit has failed. In this case, all the items in the
- * checkpoint have already gone through iop_commited and iop_unlock, which
+ * checkpoint have already gone through iop_committed and iop_committing, which
* means that checkpoint commit abort handling is treated exactly the same
* as an iclog write error even though we haven't started any IO yet. Hence in
* this case all we need to do is iop_committed processing, followed by an
@@ -833,7 +829,7 @@ xfs_trans_committed_bulk(
struct xfs_ail *ailp,
struct xfs_log_vec *log_vector,
xfs_lsn_t commit_lsn,
- int aborted)
+ bool aborted)
{
#define LOG_ITEM_BATCH_SIZE 32
struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
@@ -852,7 +848,16 @@ xfs_trans_committed_bulk(
if (aborted)
set_bit(XFS_LI_ABORTED, &lip->li_flags);
- item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+
+ if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
+ lip->li_ops->iop_release(lip);
+ continue;
+ }
+
+ if (lip->li_ops->iop_committed)
+ item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
+ else
+ item_lsn = commit_lsn;
/* item_lsn of -1 means the item needs no further processing */
if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -864,7 +869,8 @@ xfs_trans_committed_bulk(
*/
if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
- lip->li_ops->iop_unpin(lip, 1);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 1);
continue;
}
@@ -882,7 +888,8 @@ xfs_trans_committed_bulk(
xfs_trans_ail_update(ailp, lip, item_lsn);
else
spin_unlock(&ailp->ail_lock);
- lip->li_ops->iop_unpin(lip, 0);
+ if (lip->li_ops->iop_unpin)
+ lip->li_ops->iop_unpin(lip, 0);
continue;
}
@@ -998,7 +1005,7 @@ out_unreserve:
tp->t_ticket = NULL;
}
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
+ xfs_trans_free_items(tp, !!error);
xfs_trans_free(tp);
XFS_STATS_INC(mp, xs_trans_empty);
@@ -1060,7 +1067,7 @@ xfs_trans_cancel(
/* mark this thread as no longer being in a transaction */
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
- xfs_trans_free_items(tp, NULLCOMMITLSN, dirty);
+ xfs_trans_free_items(tp, dirty);
xfs_trans_free(tp);
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6e1c5704a8c..64d7f171ebd3 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -27,7 +27,7 @@ struct xfs_cud_log_item;
struct xfs_bui_log_item;
struct xfs_bud_log_item;
-typedef struct xfs_log_item {
+struct xfs_log_item {
struct list_head li_ail; /* AIL pointers */
struct list_head li_trans; /* transaction list */
xfs_lsn_t li_lsn; /* last on-disk lsn */
@@ -48,7 +48,7 @@ typedef struct xfs_log_item {
struct xfs_log_vec *li_lv; /* active log vector */
struct xfs_log_vec *li_lv_shadow; /* standby vector */
xfs_lsn_t li_seq; /* CIL commit seq */
-} xfs_log_item_t;
+};
/*
* li_flags use the (set/test/clear)_bit atomic interfaces because updates can
@@ -67,17 +67,24 @@ typedef struct xfs_log_item {
{ (1 << XFS_LI_DIRTY), "DIRTY" }
struct xfs_item_ops {
- void (*iop_size)(xfs_log_item_t *, int *, int *);
- void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *);
- void (*iop_pin)(xfs_log_item_t *);
- void (*iop_unpin)(xfs_log_item_t *, int remove);
+ unsigned flags;
+ void (*iop_size)(struct xfs_log_item *, int *, int *);
+ void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *);
+ void (*iop_pin)(struct xfs_log_item *);
+ void (*iop_unpin)(struct xfs_log_item *, int remove);
uint (*iop_push)(struct xfs_log_item *, struct list_head *);
- void (*iop_unlock)(xfs_log_item_t *);
- xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
- void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
- void (*iop_error)(xfs_log_item_t *, xfs_buf_t *);
+ void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
+ void (*iop_release)(struct xfs_log_item *);
+ xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
+ void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
};
+/*
+ * Release the log item as soon as committed. This is for items just logging
+ * intents that never need to be written back in place.
+ */
+#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0)
+
void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
int type, const struct xfs_item_ops *ops);
@@ -203,7 +210,7 @@ xfs_trans_read_buf(
flags, bpp, ops);
}
-struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
+struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
@@ -223,14 +230,6 @@ void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
bool xfs_trans_buf_is_dirty(struct xfs_buf *bp);
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
-struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *,
- struct xfs_efi_log_item *,
- uint);
-int xfs_trans_free_extent(struct xfs_trans *,
- struct xfs_efd_log_item *, xfs_fsblock_t,
- xfs_extlen_t,
- const struct xfs_owner_info *,
- bool);
int xfs_trans_commit(struct xfs_trans *);
int xfs_trans_roll(struct xfs_trans **);
int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
@@ -245,37 +244,4 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
extern kmem_zone_t *xfs_trans_zone;
-/* rmap updates */
-enum xfs_rmap_intent_type;
-
-struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
- struct xfs_rui_log_item *ruip);
-int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
- struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
- uint64_t owner, int whichfork, xfs_fileoff_t startoff,
- xfs_fsblock_t startblock, xfs_filblks_t blockcount,
- xfs_exntst_t state, struct xfs_btree_cur **pcur);
-
-/* refcount updates */
-enum xfs_refcount_intent_type;
-
-struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
- struct xfs_cui_log_item *cuip);
-int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
- struct xfs_cud_log_item *cudp,
- enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
- xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
-
-/* mapping updates */
-enum xfs_bmap_intent_type;
-
-struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
- struct xfs_bui_log_item *buip);
-int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
- struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type,
- struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff,
- xfs_fsblock_t startblock, xfs_filblks_t *blockcount,
- xfs_exntst_t state);
-
#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d3a4e89bf4a0..6ccfd75d3c24 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -6,6 +6,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
@@ -74,29 +75,29 @@ xfs_ail_check(
* Return a pointer to the last item in the AIL. If the AIL is empty, then
* return NULL.
*/
-static xfs_log_item_t *
+static struct xfs_log_item *
xfs_ail_max(
struct xfs_ail *ailp)
{
if (list_empty(&ailp->ail_head))
return NULL;
- return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail);
+ return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
}
/*
* Return a pointer to the item which follows the given item in the AIL. If
* the given item is the last item in the list, then return NULL.
*/
-static xfs_log_item_t *
+static struct xfs_log_item *
xfs_ail_next(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip)
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
{
if (lip->li_ail.next == &ailp->ail_head)
return NULL;
- return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail);
+ return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
}
/*
@@ -109,10 +110,10 @@ xfs_ail_next(
*/
xfs_lsn_t
xfs_ail_min_lsn(
- struct xfs_ail *ailp)
+ struct xfs_ail *ailp)
{
- xfs_lsn_t lsn = 0;
- xfs_log_item_t *lip;
+ xfs_lsn_t lsn = 0;
+ struct xfs_log_item *lip;
spin_lock(&ailp->ail_lock);
lip = xfs_ail_min(ailp);
@@ -128,10 +129,10 @@ xfs_ail_min_lsn(
*/
static xfs_lsn_t
xfs_ail_max_lsn(
- struct xfs_ail *ailp)
+ struct xfs_ail *ailp)
{
- xfs_lsn_t lsn = 0;
- xfs_log_item_t *lip;
+ xfs_lsn_t lsn = 0;
+ struct xfs_log_item *lip;
spin_lock(&ailp->ail_lock);
lip = xfs_ail_max(ailp);
@@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear(
* ascending traversal. Pass a @lsn of zero to initialise the cursor to the
* first item in the AIL. Returns NULL if the list is empty.
*/
-xfs_log_item_t *
+struct xfs_log_item *
xfs_trans_ail_cursor_first(
struct xfs_ail *ailp,
struct xfs_ail_cursor *cur,
xfs_lsn_t lsn)
{
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
xfs_trans_ail_cursor_init(ailp, cur);
@@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last(
struct xfs_ail *ailp,
xfs_lsn_t lsn)
{
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
@@ -327,8 +328,8 @@ xfs_ail_splice(
*/
static void
xfs_ail_delete(
- struct xfs_ail *ailp,
- xfs_log_item_t *lip)
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
{
xfs_ail_check(ailp, lip);
list_del(&lip->li_ail);
@@ -347,6 +348,14 @@ xfsaild_push_item(
if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
+ /*
+ * Consider the item pinned if a push callback is not defined so the
+ * caller will force the log. This should only happen for intent items
+ * as they are unpinned once the associated done item is committed to
+ * the on-disk log.
+ */
+ if (!lip->li_ops->iop_push)
+ return XFS_ITEM_PINNED;
return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
}
@@ -356,7 +365,7 @@ xfsaild_push(
{
xfs_mount_t *mp = ailp->ail_mount;
struct xfs_ail_cursor cur;
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
xfs_lsn_t lsn;
xfs_lsn_t target;
long tout;
@@ -611,10 +620,10 @@ xfsaild(
*/
void
xfs_ail_push(
- struct xfs_ail *ailp,
- xfs_lsn_t threshold_lsn)
+ struct xfs_ail *ailp,
+ xfs_lsn_t threshold_lsn)
{
- xfs_log_item_t *lip;
+ struct xfs_log_item *lip;
lip = xfs_ail_min(ailp);
if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
@@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk(
int nr_items,
xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
- xfs_log_item_t *mlip;
+ struct xfs_log_item *mlip;
int mlip_changed = 0;
int i;
LIST_HEAD(tmp);
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
deleted file mode 100644
index e1c7d55b32c3..000000000000
--- a/fs/xfs/xfs_trans_bmap.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_bmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_inode.h"
-
-/*
- * This routine is called to allocate a "bmap update done"
- * log item.
- */
-struct xfs_bud_log_item *
-xfs_trans_get_bud(
- struct xfs_trans *tp,
- struct xfs_bui_log_item *buip)
-{
- struct xfs_bud_log_item *budp;
-
- budp = xfs_bud_init(tp->t_mountp, buip);
- xfs_trans_add_item(tp, &budp->bud_item);
- return budp;
-}
-
-/*
- * Finish an bmap update and log it to the BUD. Note that the
- * transaction is marked dirty regardless of whether the bmap update
- * succeeds or fails to support the BUI/BUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_bmap_update(
- struct xfs_trans *tp,
- struct xfs_bud_log_item *budp,
- enum xfs_bmap_intent_type type,
- struct xfs_inode *ip,
- int whichfork,
- xfs_fileoff_t startoff,
- xfs_fsblock_t startblock,
- xfs_filblks_t *blockcount,
- xfs_exntst_t state)
-{
- int error;
-
- error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
- startblock, blockcount, state);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the BUI and frees the BUD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
-
- return error;
-}
-
-/* Sort bmap intents by inode. */
-static int
-xfs_bmap_update_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_bmap_intent *ba;
- struct xfs_bmap_intent *bb;
-
- ba = container_of(a, struct xfs_bmap_intent, bi_list);
- bb = container_of(b, struct xfs_bmap_intent, bi_list);
- return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
-}
-
-/* Get an BUI. */
-STATIC void *
-xfs_bmap_update_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_bui_log_item *buip;
-
- ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
- ASSERT(tp != NULL);
-
- buip = xfs_bui_init(tp->t_mountp);
- ASSERT(buip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &buip->bui_item);
- return buip;
-}
-
-/* Set the map extent flags for this mapping. */
-static void
-xfs_trans_set_bmap_flags(
- struct xfs_map_extent *bmap,
- enum xfs_bmap_intent_type type,
- int whichfork,
- xfs_exntst_t state)
-{
- bmap->me_flags = 0;
- switch (type) {
- case XFS_BMAP_MAP:
- case XFS_BMAP_UNMAP:
- bmap->me_flags = type;
- break;
- default:
- ASSERT(0);
- }
- if (state == XFS_EXT_UNWRITTEN)
- bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
- if (whichfork == XFS_ATTR_FORK)
- bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
-}
-
-/* Log bmap updates in the intent item. */
-STATIC void
-xfs_bmap_update_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_bui_log_item *buip = intent;
- struct xfs_bmap_intent *bmap;
- uint next_extent;
- struct xfs_map_extent *map;
-
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
- ASSERT(next_extent < buip->bui_format.bui_nextents);
- map = &buip->bui_format.bui_extents[next_extent];
- map->me_owner = bmap->bi_owner->i_ino;
- map->me_startblock = bmap->bi_bmap.br_startblock;
- map->me_startoff = bmap->bi_bmap.br_startoff;
- map->me_len = bmap->bi_bmap.br_blockcount;
- xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
- bmap->bi_bmap.br_state);
-}
-
-/* Get an BUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_bmap_update_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_bud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_bmap_update_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_bmap_intent *bmap;
- xfs_filblks_t count;
- int error;
-
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
- count = bmap->bi_bmap.br_blockcount;
- error = xfs_trans_log_finish_bmap_update(tp, done_item,
- bmap->bi_type,
- bmap->bi_owner, bmap->bi_whichfork,
- bmap->bi_bmap.br_startoff,
- bmap->bi_bmap.br_startblock,
- &count,
- bmap->bi_bmap.br_state);
- if (!error && count > 0) {
- ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
- bmap->bi_bmap.br_blockcount = count;
- return -EAGAIN;
- }
- kmem_free(bmap);
- return error;
-}
-
-/* Abort all pending BUIs. */
-STATIC void
-xfs_bmap_update_abort_intent(
- void *intent)
-{
- xfs_bui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_bmap_update_cancel_item(
- struct list_head *item)
-{
- struct xfs_bmap_intent *bmap;
-
- bmap = container_of(item, struct xfs_bmap_intent, bi_list);
- kmem_free(bmap);
-}
-
-const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
- .max_items = XFS_BUI_MAX_FAST_EXTENTS,
- .diff_items = xfs_bmap_update_diff_items,
- .create_intent = xfs_bmap_update_create_intent,
- .abort_intent = xfs_bmap_update_abort_intent,
- .log_item = xfs_bmap_update_log_item,
- .create_done = xfs_bmap_update_create_done,
- .finish_item = xfs_bmap_update_finish_item,
- .cancel_item = xfs_bmap_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 7d65ebf1e847..b5b3a78ef31c 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -10,11 +10,9 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
-#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
-#include "xfs_error.h"
#include "xfs_trace.h"
/*
@@ -174,8 +172,7 @@ xfs_trans_get_buf_map(
xfs_buf_t *
xfs_trans_getsb(
xfs_trans_t *tp,
- struct xfs_mount *mp,
- int flags)
+ struct xfs_mount *mp)
{
xfs_buf_t *bp;
struct xfs_buf_log_item *bip;
@@ -185,7 +182,7 @@ xfs_trans_getsb(
* if tp is NULL.
*/
if (tp == NULL)
- return xfs_getsb(mp, flags);
+ return xfs_getsb(mp);
/*
* If the superblock buffer already has this transaction
@@ -203,7 +200,7 @@ xfs_trans_getsb(
return bp;
}
- bp = xfs_getsb(mp, flags);
+ bp = xfs_getsb(mp);
if (bp == NULL)
return NULL;
@@ -428,7 +425,7 @@ xfs_trans_brelse(
/*
* Mark the buffer as not needing to be unlocked when the buf item's
- * iop_unlock() routine is called. The buffer must already be locked
+ * iop_committing() routine is called. The buffer must already be locked
* and associated with the given transaction.
*/
/* ARGSUSED */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index cd664a03613f..1027c9ca6eb8 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -11,7 +11,6 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
-#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_quota.h"
@@ -29,7 +28,6 @@ xfs_trans_dqjoin(
xfs_trans_t *tp,
xfs_dquot_t *dqp)
{
- ASSERT(dqp->q_transp != tp);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(dqp->q_logitem.qli_dquot == dqp);
@@ -37,15 +35,8 @@ xfs_trans_dqjoin(
* Get a log_item_desc to point at the new item.
*/
xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
- /*
- * Initialize d_transp so we can later determine if this dquot is
- * associated with this transaction.
- */
- dqp->q_transp = tp;
}
-
/*
* This is called to mark the dquot as needing
* to be logged when the transaction is committed. The dquot must
@@ -61,7 +52,6 @@ xfs_trans_log_dquot(
xfs_trans_t *tp,
xfs_dquot_t *dqp)
{
- ASSERT(dqp->q_transp == tp);
ASSERT(XFS_DQ_IS_LOCKED(dqp));
tp->t_flags |= XFS_TRANS_DIRTY;
@@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas(
break;
ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(dqp->q_transp == tp);
/*
* adjust the actual number of blocks used
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
deleted file mode 100644
index 8ee7a3f8bb20..000000000000
--- a/fs/xfs/xfs_trans_extfree.c
+++ /dev/null
@@ -1,286 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_bit.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_extfree_item.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_trace.h"
-
-/*
- * This routine is called to allocate an "extent free done"
- * log item that will hold nextents worth of extents. The
- * caller must use all nextents extents, because we are not
- * flexible about this at all.
- */
-struct xfs_efd_log_item *
-xfs_trans_get_efd(struct xfs_trans *tp,
- struct xfs_efi_log_item *efip,
- uint nextents)
-{
- struct xfs_efd_log_item *efdp;
-
- ASSERT(tp != NULL);
- ASSERT(nextents > 0);
-
- efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
- ASSERT(efdp != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &efdp->efd_item);
- return efdp;
-}
-
-/*
- * Free an extent and log it to the EFD. Note that the transaction is marked
- * dirty regardless of whether the extent free succeeds or fails to support the
- * EFI/EFD lifecycle rules.
- */
-int
-xfs_trans_free_extent(
- struct xfs_trans *tp,
- struct xfs_efd_log_item *efdp,
- xfs_fsblock_t start_block,
- xfs_extlen_t ext_len,
- const struct xfs_owner_info *oinfo,
- bool skip_discard)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_extent *extp;
- uint next_extent;
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
- xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
- start_block);
- int error;
-
- trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
-
- error = __xfs_free_extent(tp, start_block, ext_len,
- oinfo, XFS_AG_RESV_NONE, skip_discard);
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the EFI and frees the EFD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
- next_extent = efdp->efd_next_extent;
- ASSERT(next_extent < efdp->efd_format.efd_nextents);
- extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = start_block;
- extp->ext_len = ext_len;
- efdp->efd_next_extent++;
-
- return error;
-}
-
-/* Sort bmap items by AG. */
-static int
-xfs_extent_free_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_mount *mp = priv;
- struct xfs_extent_free_item *ra;
- struct xfs_extent_free_item *rb;
-
- ra = container_of(a, struct xfs_extent_free_item, xefi_list);
- rb = container_of(b, struct xfs_extent_free_item, xefi_list);
- return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
-}
-
-/* Get an EFI. */
-STATIC void *
-xfs_extent_free_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_efi_log_item *efip;
-
- ASSERT(tp != NULL);
- ASSERT(count > 0);
-
- efip = xfs_efi_init(tp->t_mountp, count);
- ASSERT(efip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &efip->efi_item);
- return efip;
-}
-
-/* Log a free extent to the intent item. */
-STATIC void
-xfs_extent_free_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_efi_log_item *efip = intent;
- struct xfs_extent_free_item *free;
- uint next_extent;
- struct xfs_extent *extp;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
- ASSERT(next_extent < efip->efi_format.efi_nextents);
- extp = &efip->efi_format.efi_extents[next_extent];
- extp->ext_start = free->xefi_startblock;
- extp->ext_len = free->xefi_blockcount;
-}
-
-/* Get an EFD so we can process all the free extents. */
-STATIC void *
-xfs_extent_free_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_efd(tp, intent, count);
-}
-
-/* Process a free extent. */
-STATIC int
-xfs_extent_free_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_extent_free_item *free;
- int error;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- error = xfs_trans_free_extent(tp, done_item,
- free->xefi_startblock,
- free->xefi_blockcount,
- &free->xefi_oinfo, free->xefi_skip_discard);
- kmem_free(free);
- return error;
-}
-
-/* Abort all pending EFIs. */
-STATIC void
-xfs_extent_free_abort_intent(
- void *intent)
-{
- xfs_efi_release(intent);
-}
-
-/* Cancel a free extent. */
-STATIC void
-xfs_extent_free_cancel_item(
- struct list_head *item)
-{
- struct xfs_extent_free_item *free;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- kmem_free(free);
-}
-
-const struct xfs_defer_op_type xfs_extent_free_defer_type = {
- .max_items = XFS_EFI_MAX_FAST_EXTENTS,
- .diff_items = xfs_extent_free_diff_items,
- .create_intent = xfs_extent_free_create_intent,
- .abort_intent = xfs_extent_free_abort_intent,
- .log_item = xfs_extent_free_log_item,
- .create_done = xfs_extent_free_create_done,
- .finish_item = xfs_extent_free_finish_item,
- .cancel_item = xfs_extent_free_cancel_item,
-};
-
-/*
- * AGFL blocks are accounted differently in the reserve pools and are not
- * inserted into the busy extent list.
- */
-STATIC int
-xfs_agfl_free_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_mount *mp = tp->t_mountp;
- struct xfs_efd_log_item *efdp = done_item;
- struct xfs_extent_free_item *free;
- struct xfs_extent *extp;
- struct xfs_buf *agbp;
- int error;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- uint next_extent;
-
- free = container_of(item, struct xfs_extent_free_item, xefi_list);
- ASSERT(free->xefi_blockcount == 1);
- agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
-
- trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
-
- error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
- if (!error)
- error = xfs_free_agfl_block(tp, agno, agbno, agbp,
- &free->xefi_oinfo);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the EFI and frees the EFD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
-
- next_extent = efdp->efd_next_extent;
- ASSERT(next_extent < efdp->efd_format.efd_nextents);
- extp = &(efdp->efd_format.efd_extents[next_extent]);
- extp->ext_start = free->xefi_startblock;
- extp->ext_len = free->xefi_blockcount;
- efdp->efd_next_extent++;
-
- kmem_free(free);
- return error;
-}
-
-
-/* sub-type with special handling for AGFL deferred frees */
-const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
- .max_items = XFS_EFI_MAX_FAST_EXTENTS,
- .diff_items = xfs_extent_free_diff_items,
- .create_intent = xfs_extent_free_create_intent,
- .abort_intent = xfs_extent_free_abort_intent,
- .log_item = xfs_extent_free_log_item,
- .create_done = xfs_extent_free_create_done,
- .finish_item = xfs_agfl_free_finish_item,
- .cancel_item = xfs_extent_free_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 542927321a61..93d14e47269d 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -8,13 +8,10 @@
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
-#include "xfs_trace.h"
#include <linux/iversion.h>
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 091eae9f4e74..2e073c1c4614 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -16,12 +16,10 @@ struct xfs_log_vec;
void xfs_trans_init(struct xfs_mount *);
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
-void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
- bool abort);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
- xfs_lsn_t commit_lsn, int aborted);
+ xfs_lsn_t commit_lsn, bool aborted);
/*
* AIL traversal cursor.
*
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
deleted file mode 100644
index 8d734728dd1b..000000000000
--- a/fs/xfs/xfs_trans_refcount.c
+++ /dev/null
@@ -1,240 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_refcount_item.h"
-#include "xfs_alloc.h"
-#include "xfs_refcount.h"
-
-/*
- * This routine is called to allocate a "refcount update done"
- * log item.
- */
-struct xfs_cud_log_item *
-xfs_trans_get_cud(
- struct xfs_trans *tp,
- struct xfs_cui_log_item *cuip)
-{
- struct xfs_cud_log_item *cudp;
-
- cudp = xfs_cud_init(tp->t_mountp, cuip);
- xfs_trans_add_item(tp, &cudp->cud_item);
- return cudp;
-}
-
-/*
- * Finish an refcount update and log it to the CUD. Note that the
- * transaction is marked dirty regardless of whether the refcount
- * update succeeds or fails to support the CUI/CUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_refcount_update(
- struct xfs_trans *tp,
- struct xfs_cud_log_item *cudp,
- enum xfs_refcount_intent_type type,
- xfs_fsblock_t startblock,
- xfs_extlen_t blockcount,
- xfs_fsblock_t *new_fsb,
- xfs_extlen_t *new_len,
- struct xfs_btree_cur **pcur)
-{
- int error;
-
- error = xfs_refcount_finish_one(tp, type, startblock,
- blockcount, new_fsb, new_len, pcur);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the CUI and frees the CUD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
-
- return error;
-}
-
-/* Sort refcount intents by AG. */
-static int
-xfs_refcount_update_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_mount *mp = priv;
- struct xfs_refcount_intent *ra;
- struct xfs_refcount_intent *rb;
-
- ra = container_of(a, struct xfs_refcount_intent, ri_list);
- rb = container_of(b, struct xfs_refcount_intent, ri_list);
- return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
-}
-
-/* Get an CUI. */
-STATIC void *
-xfs_refcount_update_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_cui_log_item *cuip;
-
- ASSERT(tp != NULL);
- ASSERT(count > 0);
-
- cuip = xfs_cui_init(tp->t_mountp, count);
- ASSERT(cuip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &cuip->cui_item);
- return cuip;
-}
-
-/* Set the phys extent flags for this reverse mapping. */
-static void
-xfs_trans_set_refcount_flags(
- struct xfs_phys_extent *refc,
- enum xfs_refcount_intent_type type)
-{
- refc->pe_flags = 0;
- switch (type) {
- case XFS_REFCOUNT_INCREASE:
- case XFS_REFCOUNT_DECREASE:
- case XFS_REFCOUNT_ALLOC_COW:
- case XFS_REFCOUNT_FREE_COW:
- refc->pe_flags |= type;
- break;
- default:
- ASSERT(0);
- }
-}
-
-/* Log refcount updates in the intent item. */
-STATIC void
-xfs_refcount_update_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_cui_log_item *cuip = intent;
- struct xfs_refcount_intent *refc;
- uint next_extent;
- struct xfs_phys_extent *ext;
-
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
- ASSERT(next_extent < cuip->cui_format.cui_nextents);
- ext = &cuip->cui_format.cui_extents[next_extent];
- ext->pe_startblock = refc->ri_startblock;
- ext->pe_len = refc->ri_blockcount;
- xfs_trans_set_refcount_flags(ext, refc->ri_type);
-}
-
-/* Get an CUD so we can process all the deferred refcount updates. */
-STATIC void *
-xfs_refcount_update_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_cud(tp, intent);
-}
-
-/* Process a deferred refcount update. */
-STATIC int
-xfs_refcount_update_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_refcount_intent *refc;
- xfs_fsblock_t new_fsb;
- xfs_extlen_t new_aglen;
- int error;
-
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
- error = xfs_trans_log_finish_refcount_update(tp, done_item,
- refc->ri_type,
- refc->ri_startblock,
- refc->ri_blockcount,
- &new_fsb, &new_aglen,
- (struct xfs_btree_cur **)state);
- /* Did we run out of reservation? Requeue what we didn't finish. */
- if (!error && new_aglen > 0) {
- ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
- refc->ri_type == XFS_REFCOUNT_DECREASE);
- refc->ri_startblock = new_fsb;
- refc->ri_blockcount = new_aglen;
- return -EAGAIN;
- }
- kmem_free(refc);
- return error;
-}
-
-/* Clean up after processing deferred refcounts. */
-STATIC void
-xfs_refcount_update_finish_cleanup(
- struct xfs_trans *tp,
- void *state,
- int error)
-{
- struct xfs_btree_cur *rcur = state;
-
- xfs_refcount_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending CUIs. */
-STATIC void
-xfs_refcount_update_abort_intent(
- void *intent)
-{
- xfs_cui_release(intent);
-}
-
-/* Cancel a deferred refcount update. */
-STATIC void
-xfs_refcount_update_cancel_item(
- struct list_head *item)
-{
- struct xfs_refcount_intent *refc;
-
- refc = container_of(item, struct xfs_refcount_intent, ri_list);
- kmem_free(refc);
-}
-
-const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
- .max_items = XFS_CUI_MAX_FAST_EXTENTS,
- .diff_items = xfs_refcount_update_diff_items,
- .create_intent = xfs_refcount_update_create_intent,
- .abort_intent = xfs_refcount_update_abort_intent,
- .log_item = xfs_refcount_update_log_item,
- .create_done = xfs_refcount_update_create_done,
- .finish_item = xfs_refcount_update_finish_item,
- .finish_cleanup = xfs_refcount_update_finish_cleanup,
- .cancel_item = xfs_refcount_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
deleted file mode 100644
index 5c7936b1be13..000000000000
--- a/fs/xfs/xfs_trans_rmap.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2016 Oracle. All Rights Reserved.
- * Author: Darrick J. Wong <darrick.wong@oracle.com>
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_defer.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_rmap_item.h"
-#include "xfs_alloc.h"
-#include "xfs_rmap.h"
-
-/* Set the map extent flags for this reverse mapping. */
-static void
-xfs_trans_set_rmap_flags(
- struct xfs_map_extent *rmap,
- enum xfs_rmap_intent_type type,
- int whichfork,
- xfs_exntst_t state)
-{
- rmap->me_flags = 0;
- if (state == XFS_EXT_UNWRITTEN)
- rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
- if (whichfork == XFS_ATTR_FORK)
- rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
- switch (type) {
- case XFS_RMAP_MAP:
- rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
- break;
- case XFS_RMAP_MAP_SHARED:
- rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
- break;
- case XFS_RMAP_UNMAP:
- rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
- break;
- case XFS_RMAP_UNMAP_SHARED:
- rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
- break;
- case XFS_RMAP_CONVERT:
- rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
- break;
- case XFS_RMAP_CONVERT_SHARED:
- rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
- break;
- case XFS_RMAP_ALLOC:
- rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
- break;
- case XFS_RMAP_FREE:
- rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
- break;
- default:
- ASSERT(0);
- }
-}
-
-struct xfs_rud_log_item *
-xfs_trans_get_rud(
- struct xfs_trans *tp,
- struct xfs_rui_log_item *ruip)
-{
- struct xfs_rud_log_item *rudp;
-
- rudp = xfs_rud_init(tp->t_mountp, ruip);
- xfs_trans_add_item(tp, &rudp->rud_item);
- return rudp;
-}
-
-/*
- * Finish an rmap update and log it to the RUD. Note that the transaction is
- * marked dirty regardless of whether the rmap update succeeds or fails to
- * support the RUI/RUD lifecycle rules.
- */
-int
-xfs_trans_log_finish_rmap_update(
- struct xfs_trans *tp,
- struct xfs_rud_log_item *rudp,
- enum xfs_rmap_intent_type type,
- uint64_t owner,
- int whichfork,
- xfs_fileoff_t startoff,
- xfs_fsblock_t startblock,
- xfs_filblks_t blockcount,
- xfs_exntst_t state,
- struct xfs_btree_cur **pcur)
-{
- int error;
-
- error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
- startblock, blockcount, state, pcur);
-
- /*
- * Mark the transaction dirty, even on error. This ensures the
- * transaction is aborted, which:
- *
- * 1.) releases the RUI and frees the RUD
- * 2.) shuts down the filesystem
- */
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
-
- return error;
-}
-
-/* Sort rmap intents by AG. */
-static int
-xfs_rmap_update_diff_items(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_mount *mp = priv;
- struct xfs_rmap_intent *ra;
- struct xfs_rmap_intent *rb;
-
- ra = container_of(a, struct xfs_rmap_intent, ri_list);
- rb = container_of(b, struct xfs_rmap_intent, ri_list);
- return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
- XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
-}
-
-/* Get an RUI. */
-STATIC void *
-xfs_rmap_update_create_intent(
- struct xfs_trans *tp,
- unsigned int count)
-{
- struct xfs_rui_log_item *ruip;
-
- ASSERT(tp != NULL);
- ASSERT(count > 0);
-
- ruip = xfs_rui_init(tp->t_mountp, count);
- ASSERT(ruip != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &ruip->rui_item);
- return ruip;
-}
-
-/* Log rmap updates in the intent item. */
-STATIC void
-xfs_rmap_update_log_item(
- struct xfs_trans *tp,
- void *intent,
- struct list_head *item)
-{
- struct xfs_rui_log_item *ruip = intent;
- struct xfs_rmap_intent *rmap;
- uint next_extent;
- struct xfs_map_extent *map;
-
- rmap = container_of(item, struct xfs_rmap_intent, ri_list);
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
-
- /*
- * atomic_inc_return gives us the value after the increment;
- * we want to use it as an array index so we need to subtract 1 from
- * it.
- */
- next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
- ASSERT(next_extent < ruip->rui_format.rui_nextents);
- map = &ruip->rui_format.rui_extents[next_extent];
- map->me_owner = rmap->ri_owner;
- map->me_startblock = rmap->ri_bmap.br_startblock;
- map->me_startoff = rmap->ri_bmap.br_startoff;
- map->me_len = rmap->ri_bmap.br_blockcount;
- xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
- rmap->ri_bmap.br_state);
-}
-
-/* Get an RUD so we can process all the deferred rmap updates. */
-STATIC void *
-xfs_rmap_update_create_done(
- struct xfs_trans *tp,
- void *intent,
- unsigned int count)
-{
- return xfs_trans_get_rud(tp, intent);
-}
-
-/* Process a deferred rmap update. */
-STATIC int
-xfs_rmap_update_finish_item(
- struct xfs_trans *tp,
- struct list_head *item,
- void *done_item,
- void **state)
-{
- struct xfs_rmap_intent *rmap;
- int error;
-
- rmap = container_of(item, struct xfs_rmap_intent, ri_list);
- error = xfs_trans_log_finish_rmap_update(tp, done_item,
- rmap->ri_type,
- rmap->ri_owner, rmap->ri_whichfork,
- rmap->ri_bmap.br_startoff,
- rmap->ri_bmap.br_startblock,
- rmap->ri_bmap.br_blockcount,
- rmap->ri_bmap.br_state,
- (struct xfs_btree_cur **)state);
- kmem_free(rmap);
- return error;
-}
-
-/* Clean up after processing deferred rmaps. */
-STATIC void
-xfs_rmap_update_finish_cleanup(
- struct xfs_trans *tp,
- void *state,
- int error)
-{
- struct xfs_btree_cur *rcur = state;
-
- xfs_rmap_finish_one_cleanup(tp, rcur, error);
-}
-
-/* Abort all pending RUIs. */
-STATIC void
-xfs_rmap_update_abort_intent(
- void *intent)
-{
- xfs_rui_release(intent);
-}
-
-/* Cancel a deferred rmap update. */
-STATIC void
-xfs_rmap_update_cancel_item(
- struct list_head *item)
-{
- struct xfs_rmap_intent *rmap;
-
- rmap = container_of(item, struct xfs_rmap_intent, ri_list);
- kmem_free(rmap);
-}
-
-const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
- .max_items = XFS_RUI_MAX_FAST_EXTENTS,
- .diff_items = xfs_rmap_update_diff_items,
- .create_intent = xfs_rmap_update_create_intent,
- .abort_intent = xfs_rmap_update_abort_intent,
- .log_item = xfs_rmap_update_log_item,
- .create_done = xfs_rmap_update_create_done,
- .finish_item = xfs_rmap_update_finish_item,
- .finish_cleanup = xfs_rmap_update_finish_cleanup,
- .cancel_item = xfs_rmap_update_cancel_item,
-};
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 9a63016009a1..3123b5aaad2a 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -5,15 +5,12 @@
*/
#include "xfs.h"
+#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>