diff options
Diffstat (limited to 'fs')
198 files changed, 6262 insertions, 5801 deletions
@@ -1479,8 +1479,9 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) return 0; } -static int aio_setup_rw(int rw, const struct iocb *iocb, struct iovec **iovec, - bool vectored, bool compat, struct iov_iter *iter) +static ssize_t aio_setup_rw(int rw, const struct iocb *iocb, + struct iovec **iovec, bool vectored, bool compat, + struct iov_iter *iter) { void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; size_t len = iocb->aio_nbytes; @@ -1537,7 +1538,7 @@ static int aio_read(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) @@ -1565,7 +1566,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, return -EINVAL; ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); - if (ret) + if (ret < 0) return ret; ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); if (!ret) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 56ae2f659b6d..cfeff1b8dce0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -187,7 +187,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - unsigned int fsflags; + unsigned int fsflags, old_fsflags; int ret; const char *comp = NULL; u32 binode_flags = binode->flags; @@ -212,13 +212,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) inode_lock(inode); fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); - if ((fsflags ^ btrfs_inode_flags_to_fsflags(binode->flags)) & - (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; - goto out_unlock; - } - } + old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); + ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags); + if (ret) + goto out_unlock; if (fsflags & FS_SYNC_FL) binode_flags |= BTRFS_INODE_SYNC; @@ -376,9 +373,7 @@ static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(file_inode(file)); struct fsxattr fa; - memset(&fa, 0, sizeof(fa)); - fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags); - + simple_fill_fsxattr(&fa, btrfs_inode_flags_to_xflags(binode->flags)); if (copy_to_user(arg, &fa, sizeof(fa))) return -EFAULT; @@ -391,7 +386,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) struct btrfs_inode *binode = BTRFS_I(inode); struct btrfs_root *root = binode->root; struct btrfs_trans_handle *trans; - struct fsxattr fa; + struct fsxattr fa, old_fa; unsigned old_flags; unsigned old_i_flags; int ret = 0; @@ -402,7 +397,6 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) if (btrfs_root_readonly(root)) return -EROFS; - memset(&fa, 0, sizeof(fa)); if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; @@ -422,13 +416,11 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) old_flags = binode->flags; old_i_flags = inode->i_flags; - /* We need the capabilities to change append-only or immutable inode */ - if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) || - (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) && - !capable(CAP_LINUX_IMMUTABLE)) { - ret = -EPERM; + simple_fill_fsxattr(&old_fa, + btrfs_inode_flags_to_xflags(binode->flags)); + ret = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); + if (ret) goto out_unlock; - } if (fa.fsx_xflags & FS_XFLAG_SYNC) binode->flags |= BTRFS_INODE_SYNC; diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index 0e941d42e3e9..d6bbccb0ed15 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -737,7 +737,7 @@ void dlm_delete_debug_file(struct dlm_ls *ls) debugfs_remove(ls->ls_debug_toss_dentry); } -int dlm_create_debug_file(struct dlm_ls *ls) +void dlm_create_debug_file(struct dlm_ls *ls) { char name[DLM_LOCKSPACE_LEN + 8]; @@ -748,8 +748,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format1_fops); - if (!ls->ls_debug_rsb_dentry) - goto fail; /* format 2 */ @@ -761,8 +759,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format2_fops); - if (!ls->ls_debug_locks_dentry) - goto fail; /* format 3 */ @@ -774,8 +770,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format3_fops); - if (!ls->ls_debug_all_dentry) - goto fail; /* format 4 */ @@ -787,8 +781,6 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &format4_fops); - if (!ls->ls_debug_toss_dentry) - goto fail; memset(name, 0, sizeof(name)); snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name); @@ -798,21 +790,12 @@ int dlm_create_debug_file(struct dlm_ls *ls) dlm_root, ls, &waiters_fops); - if (!ls->ls_debug_waiters_dentry) - goto fail; - - return 0; - - fail: - dlm_delete_debug_file(ls); - return -ENOMEM; } -int __init dlm_register_debugfs(void) +void __init dlm_register_debugfs(void) { mutex_init(&debug_buf_lock); dlm_root = debugfs_create_dir("dlm", NULL); - return dlm_root ? 0 : -ENOMEM; } void dlm_unregister_debugfs(void) diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index da1173a0b274..416d9de35679 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -719,14 +719,14 @@ int dlm_plock_init(void); void dlm_plock_exit(void); #ifdef CONFIG_DLM_DEBUG -int dlm_register_debugfs(void); +void dlm_register_debugfs(void); void dlm_unregister_debugfs(void); -int dlm_create_debug_file(struct dlm_ls *ls); +void dlm_create_debug_file(struct dlm_ls *ls); void dlm_delete_debug_file(struct dlm_ls *ls); #else -static inline int dlm_register_debugfs(void) { return 0; } +static inline void dlm_register_debugfs(void) { } static inline void dlm_unregister_debugfs(void) { } -static inline int dlm_create_debug_file(struct dlm_ls *ls) { return 0; } +static inline void dlm_create_debug_file(struct dlm_ls *ls) { } static inline void dlm_delete_debug_file(struct dlm_ls *ls) { } #endif diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 114ebfe30929..3951d39b9b75 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1628,8 +1628,10 @@ static void clean_writequeues(void) static void work_stop(void) { - destroy_workqueue(recv_workqueue); - destroy_workqueue(send_workqueue); + if (recv_workqueue) + destroy_workqueue(recv_workqueue); + if (send_workqueue) + destroy_workqueue(send_workqueue); } static int work_start(void) @@ -1689,13 +1691,17 @@ static void work_flush(void) struct hlist_node *n; struct connection *con; - flush_workqueue(recv_workqueue); - flush_workqueue(send_workqueue); + if (recv_workqueue) + flush_workqueue(recv_workqueue); + if (send_workqueue) + flush_workqueue(send_workqueue); do { ok = 1; foreach_conn(stop_conn); - flush_workqueue(recv_workqueue); - flush_workqueue(send_workqueue); + if (recv_workqueue) + flush_workqueue(recv_workqueue); + if (send_workqueue) + flush_workqueue(send_workqueue); for (i = 0; i < CONN_HASH_SIZE && ok; i++) { hlist_for_each_entry_safe(con, n, &connection_hash[i], list) { diff --git a/fs/dlm/main.c b/fs/dlm/main.c index 39579927ed84..afc66a1346d3 100644 --- a/fs/dlm/main.c +++ b/fs/dlm/main.c @@ -35,9 +35,7 @@ static int __init init_dlm(void) if (error) goto out_lockspace; - error = dlm_register_debugfs(); - if (error) - goto out_config; + dlm_register_debugfs(); error = dlm_user_init(); if (error) @@ -61,7 +59,6 @@ static int __init init_dlm(void) dlm_user_exit(); out_debug: dlm_unregister_debugfs(); - out_config: dlm_config_exit(); out_lockspace: dlm_lockspace_exit(); diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index ee3bc0c96b9d..e9e27a271af0 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -107,16 +107,22 @@ out_free: return size; } -static int -efivarfs_ioc_getxflags(struct file *file, void __user *arg) +static inline unsigned int efivarfs_getflags(struct inode *inode) { - struct inode *inode = file->f_mapping->host; unsigned int i_flags; unsigned int flags = 0; i_flags = inode->i_flags; if (i_flags & S_IMMUTABLE) flags |= FS_IMMUTABLE_FL; + return flags; +} + +static int +efivarfs_ioc_getxflags(struct file *file, void __user *arg) +{ + struct inode *inode = file->f_mapping->host; + unsigned int flags = efivarfs_getflags(inode); if (copy_to_user(arg, &flags, sizeof(flags))) return -EFAULT; @@ -129,6 +135,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg) struct inode *inode = file->f_mapping->host; unsigned int flags; unsigned int i_flags = 0; + unsigned int oldflags = efivarfs_getflags(inode); int error; if (!inode_owner_or_capable(inode)) @@ -140,9 +147,6 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg) if (flags & ~FS_IMMUTABLE_FL) return -EOPNOTSUPP; - if (!capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - if (flags & FS_IMMUTABLE_FL) i_flags |= S_IMMUTABLE; @@ -152,12 +156,16 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg) return error; inode_lock(inode); + + error = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (error) + goto out; + inode_set_flags(inode, i_flags, S_IMMUTABLE); +out: inode_unlock(inode); - mnt_drop_write_file(file); - - return 0; + return error; } static long diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c index 0367c0039e68..1b853fb0b163 100644 --- a/fs/ext2/ioctl.c +++ b/fs/ext2/ioctl.c @@ -60,18 +60,10 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } oldflags = ei->i_flags; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); - ret = -EPERM; - goto setflags_out; - } + ret = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (ret) { + inode_unlock(inode); + goto setflags_out; } flags = flags & EXT2_FL_USER_MODIFIABLE; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 74648d42c69b..442f7ef873fc 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -312,16 +312,9 @@ static int ext4_ioctl_setflags(struct inode *inode, /* The JOURNAL_DATA flag is modifiable only by root */ jflag = flags & EXT4_JOURNAL_DATA_FL; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - * - * This test looks nicer. Thanks to Pauline Middelink - */ - if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) - goto flags_out; - } + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) + goto flags_out; /* * The JOURNAL_DATA flag can only be changed by @@ -741,28 +734,15 @@ group_add_out: return err; } -static int ext4_ioctl_check_project(struct inode *inode, struct fsxattr *fa) +static void ext4_fill_fsxattr(struct inode *inode, struct fsxattr *fa) { - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (__kprojid_val(EXT4_I(inode)->i_projid) != fa->fsx_projid) - return -EINVAL; + struct ext4_inode_info *ei = EXT4_I(inode); - if (ext4_test_inode_flag(inode, EXT4_INODE_PROJINHERIT)) { - if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT)) - return -EINVAL; - } else { - if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT) - return -EINVAL; - } + simple_fill_fsxattr(fa, ext4_iflags_to_xflags(ei->i_flags & + EXT4_FL_USER_VISIBLE)); - return 0; + if (ext4_has_feature_project(inode->i_sb)) + fa->fsx_projid = from_kprojid(&init_user_ns, ei->i_projid); } long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) @@ -1139,13 +1119,7 @@ resizefs_out: { struct fsxattr fa; - memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = ext4_iflags_to_xflags(ei->i_flags & EXT4_FL_USER_VISIBLE); - - if (ext4_has_feature_project(inode->i_sb)) { - fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, - EXT4_I(inode)->i_projid); - } + ext4_fill_fsxattr(inode, &fa); if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa))) @@ -1154,7 +1128,7 @@ resizefs_out: } case EXT4_IOC_FSSETXATTR: { - struct fsxattr fa; + struct fsxattr fa, old_fa; int err; if (copy_from_user(&fa, (struct fsxattr __user *)arg, @@ -1177,7 +1151,8 @@ resizefs_out: return err; inode_lock(inode); - err = ext4_ioctl_check_project(inode, &fa); + ext4_fill_fsxattr(inode, &old_fa); + err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa); if (err) goto out; flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) | diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ed70b68b2b38..a0eef95b9e0e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -146,8 +146,8 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, exist = f2fs_test_bit(offset, se->cur_valid_map); if (!exist && type == DATA_GENERIC_ENHANCE) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "blkaddr:%u, sit bitmap:%d", blkaddr, exist); + f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d", + blkaddr, exist); set_sbi_flag(sbi, SBI_NEED_FSCK); WARN_ON(1); } @@ -184,8 +184,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, case DATA_GENERIC_ENHANCE_READ: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) { - f2fs_msg(sbi->sb, KERN_WARNING, - "access invalid blkaddr:%u", blkaddr); + f2fs_warn(sbi, "access invalid blkaddr:%u", + blkaddr); set_sbi_flag(sbi, SBI_NEED_FSCK); WARN_ON(1); return false; @@ -657,9 +657,8 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) err_out: set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); + f2fs_warn(sbi, "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); return err; } @@ -676,13 +675,12 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) return 0; if (bdev_read_only(sbi->sb->s_bdev)) { - f2fs_msg(sbi->sb, KERN_INFO, "write access " - "unavailable, skipping orphan cleanup"); + f2fs_info(sbi, "write access unavailable, skipping orphan cleanup"); return 0; } if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs"); + f2fs_info(sbi, "orphan cleanup on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } @@ -827,26 +825,14 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, if (crc_offset < CP_MIN_CHKSUM_OFFSET || crc_offset > CP_CHKSUM_OFFSET) { f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid crc_offset: %zu", crc_offset); + f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - if (__is_set_ckpt_flags(*cp_block, CP_LARGE_NAT_BITMAP_FLAG)) { - if (crc_offset != CP_MIN_CHKSUM_OFFSET) { - f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, - "layout of large_nat_bitmap is deprecated, " - "run fsck to repair, chksum_offset: %zu", - crc_offset); - return -EINVAL; - } - } - crc = f2fs_checkpoint_chksum(sbi, *cp_block); if (crc != cur_cp_crc(*cp_block)) { f2fs_put_page(*cp_page, 1); - f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); + f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -869,9 +855,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (le32_to_cpu(cp_block->cp_pack_total_block_count) > sbi->blocks_per_seg) { - f2fs_msg(sbi->sb, KERN_WARNING, - "invalid cp_pack_total_block_count:%u", - le32_to_cpu(cp_block->cp_pack_total_block_count)); + f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", + le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; @@ -905,6 +890,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned int cp_blks = 1 + __cp_payload(sbi); block_t cp_blk_no; int i; + int err; sbi->ckpt = f2fs_kzalloc(sbi, array_size(blk_size, cp_blks), GFP_KERNEL); @@ -932,6 +918,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) } else if (cp2) { cur_page = cp2; } else { + err = -EFSCORRUPTED; goto fail_no_cp; } @@ -944,8 +931,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) sbi->cur_cp_pack = 2; /* Sanity checking of checkpoint */ - if (f2fs_sanity_check_ckpt(sbi)) + if (f2fs_sanity_check_ckpt(sbi)) { + err = -EFSCORRUPTED; goto free_fail_no_cp; + } if (cp_blks <= 1) goto done; @@ -959,8 +948,10 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned char *ckpt = (unsigned char *)sbi->ckpt; cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); - if (IS_ERR(cur_page)) + if (IS_ERR(cur_page)) { + err = PTR_ERR(cur_page); goto free_fail_no_cp; + } sit_bitmap_ptr = page_address(cur_page); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); f2fs_put_page(cur_page, 1); @@ -975,7 +966,7 @@ free_fail_no_cp: f2fs_put_page(cp2, 1); fail_no_cp: kvfree(sbi->ckpt); - return -EINVAL; + return err; } static void __add_dirty_inode(struct inode *inode, enum inode_type type) @@ -1142,17 +1133,24 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi) static bool __need_flush_quota(struct f2fs_sb_info *sbi) { + bool ret = false; + if (!is_journalled_quota(sbi)) return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) - return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) - return false; - if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) - return true; - if (get_pages(sbi, F2FS_DIRTY_QDATA)) - return true; - return false; + + down_write(&sbi->quota_sem); + if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { + ret = false; + } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH)) { + clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + ret = true; + } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { + ret = true; + } + up_write(&sbi->quota_sem); + return ret; } /* @@ -1171,26 +1169,22 @@ static int block_operations(struct f2fs_sb_info *sbi) blk_start_plug(&plug); retry_flush_quotas: + f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { int locked; if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); - f2fs_lock_all(sbi); + set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); goto retry_flush_dents; } - clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + f2fs_unlock_all(sbi); /* only failed during mount/umount/freeze/quotactl */ locked = down_read_trylock(&sbi->sb->s_umount); f2fs_quota_sync(sbi->sb, -1); if (locked) up_read(&sbi->sb->s_umount); - } - - f2fs_lock_all(sbi); - if (__need_flush_quota(sbi)) { - f2fs_unlock_all(sbi); cond_resched(); goto retry_flush_quotas; } @@ -1212,12 +1206,6 @@ retry_flush_dents: */ down_write(&sbi->node_change); - if (__need_flush_quota(sbi)) { - up_write(&sbi->node_change); - f2fs_unlock_all(sbi); - goto retry_flush_quotas; - } - if (get_pages(sbi, F2FS_DIRTY_IMETA)) { up_write(&sbi->node_change); f2fs_unlock_all(sbi); @@ -1313,7 +1301,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) else __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || + is_sbi_flag_set(sbi, SBI_IS_RESIZEFS)) __set_ckpt_flags(ckpt, CP_FSCK_FLAG); if (is_sbi_flag_set(sbi, SBI_CP_DISABLED)) @@ -1328,10 +1317,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); - /* - * TODO: we count on fsck.f2fs to clear this flag until we figure out - * missing cases which clear it incorrectly. - */ + else + __clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) __set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG); @@ -1571,8 +1558,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (cpc->reason != CP_PAUSE) return 0; - f2fs_msg(sbi->sb, KERN_WARNING, - "Start checkpoint disabled!"); + f2fs_warn(sbi, "Start checkpoint disabled!"); } mutex_lock(&sbi->cp_mutex); @@ -1638,8 +1624,7 @@ stop: stat_inc_cp_count(sbi->stat_info); if (cpc->reason & CP_RECOVERY) - f2fs_msg(sbi->sb, KERN_NOTICE, - "checkpoint: version = %llx", ckpt_ver); + f2fs_notice(sbi, "checkpoint: version = %llx", ckpt_ver); /* do checkpoint periodically */ f2fs_update_time(sbi, CP_TIME); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a546ac8685ea..0ca530afc684 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -14,6 +14,7 @@ #include <linux/pagevec.h> #include <linux/blkdev.h> #include <linux/bio.h> +#include <linux/swap.h> #include <linux/prefetch.h> #include <linux/uio.h> #include <linux/cleancache.h> @@ -54,7 +55,7 @@ static bool __is_cp_guaranteed(struct page *page) static enum count_type __read_io_type(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_file_mapping(page); if (mapping) { struct inode *inode = mapping->host; @@ -347,20 +348,20 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) io->bio = NULL; } -static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode, +static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; struct page *target; struct bvec_iter_all iter_all; - if (!io->bio) + if (!bio) return false; if (!inode && !page && !ino) return true; - bio_for_each_segment_all(bvec, io->bio, iter_all) { + bio_for_each_segment_all(bvec, bio, iter_all) { target = bvec->bv_page; if (fscrypt_is_bounce_page(target)) @@ -410,7 +411,7 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, struct f2fs_bio_info *io = sbi->write_io[btype] + temp; down_read(&io->io_rwsem); - ret = __has_merged_page(io, inode, page, ino); + ret = __has_merged_page(io->bio, inode, page, ino); up_read(&io->io_rwsem); } if (ret) @@ -454,7 +455,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE))) - return -EFAULT; + return -EFSCORRUPTED; trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); @@ -480,6 +481,61 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) return 0; } +int f2fs_merge_page_bio(struct f2fs_io_info *fio) +{ + struct bio *bio = *fio->bio; + struct page *page = fio->encrypted_page ? + fio->encrypted_page : fio->page; + + if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, + __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) + return -EFSCORRUPTED; + + trace_f2fs_submit_page_bio(page, fio); + f2fs_trace_ios(fio, 0); + + if (bio && (*fio->last_block + 1 != fio->new_blkaddr || + !__same_bdev(fio->sbi, fio->new_blkaddr, bio))) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + } +alloc_new: + if (!bio) { + bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc, + BIO_MAX_PAGES, false, fio->type, fio->temp); + bio_set_op_attrs(bio, fio->op, fio->op_flags); + } + + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + __submit_bio(fio->sbi, bio, fio->type); + bio = NULL; + goto alloc_new; + } + + if (fio->io_wbc) + wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + + inc_page_count(fio->sbi, WB_DATA_TYPE(page)); + + *fio->last_block = fio->new_blkaddr; + *fio->bio = bio; + + return 0; +} + +static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + if (!bio) + return; + + if (!__has_merged_page(*bio, NULL, page, 0)) + return; + + __submit_bio(sbi, *bio, DATA); + *bio = NULL; +} + void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -733,7 +789,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, dn.data_blkaddr = ei.blk + index - ei.fofs; if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_err; } goto got_it; @@ -753,7 +809,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_err; } got_it: @@ -1099,7 +1155,7 @@ next_block: if (__is_valid_data_blkaddr(blkaddr) && !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto sync_out; } @@ -1529,7 +1585,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page, sector_t block_nr; int ret = 0; - block_in_file = (sector_t)page->index; + block_in_file = (sector_t)page_index(page); last_block = block_in_file + nr_pages; last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; @@ -1562,14 +1618,15 @@ got_it: block_nr = map->m_pblk + block_in_file - map->m_lblk; SetPageMappedToDisk(page); - if (!PageUptodate(page) && !cleancache_get_page(page)) { + if (!PageUptodate(page) && (!PageSwapCache(page) && + !cleancache_get_page(page))) { SetPageUptodate(page); goto confused; } if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { - ret = -EFAULT; + ret = -EFSCORRUPTED; goto out; } } else { @@ -1660,7 +1717,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, - page->index, + page_index(page), readahead_gfp_mask(mapping))) goto next_page; } @@ -1684,7 +1741,7 @@ next_page: static int f2fs_read_data_page(struct file *file, struct page *page) { - struct inode *inode = page->mapping->host; + struct inode *inode = page_file_mapping(page)->host; int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); @@ -1693,7 +1750,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false); + ret = f2fs_mpage_readpages(page_file_mapping(page), + NULL, page, 1, false); return ret; } @@ -1851,7 +1909,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio) if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + return -EFSCORRUPTED; ipu_force = true; fio->need_lock = LOCK_DONE; @@ -1878,7 +1936,7 @@ got_it: if (__is_valid_data_blkaddr(fio->old_blkaddr) && !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, DATA_GENERIC_ENHANCE)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto out_writepage; } /* @@ -1946,6 +2004,8 @@ out: } static int __write_data_page(struct page *page, bool *submitted, + struct bio **bio, + sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type) { @@ -1971,6 +2031,8 @@ static int __write_data_page(struct page *page, bool *submitted, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, + .bio = bio, + .last_block = last_block, }; trace_f2fs_writepage(page, DATA); @@ -2069,10 +2131,13 @@ out: unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) + !F2FS_I(inode)->cp_task) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_balance_fs(sbi, need_balance_fs); + } if (unlikely(f2fs_cp_error(sbi))) { + f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); submitted = NULL; } @@ -2099,7 +2164,7 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, wbc, FS_DATA_IO); + return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); } /* @@ -2115,6 +2180,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int done = 0; struct pagevec pvec; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct bio *bio = NULL; + sector_t last_block; int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; @@ -2191,17 +2258,20 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) + if (wbc->sync_mode != WB_SYNC_NONE) { f2fs_wait_on_page_writeback(page, DATA, true, true); - else + f2fs_submit_ipu_bio(sbi, &bio, page); + } else { goto continue_unlock; + } } if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, wbc, io_type); + ret = __write_data_page(page, &submitted, &bio, + &last_block, wbc, io_type); if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to @@ -2250,6 +2320,9 @@ continue_unlock: if (nwritten) f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, NULL, 0, DATA); + /* submit cached bio of IPU write */ + if (bio) + __submit_bio(sbi, bio, DATA); return ret; } @@ -2261,6 +2334,9 @@ static inline bool __should_serialize_io(struct inode *inode, return false; if (IS_NOQUOTA(inode)) return false; + /* to avoid deadlock in path of data flush */ + if (F2FS_I(inode)->cp_task) + return false; if (wbc->sync_mode != WB_SYNC_ALL) return true; if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) @@ -2532,7 +2608,7 @@ repeat: } else { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto fail; } err = f2fs_submit_page_read(inode, page, blkaddr); @@ -2777,13 +2853,14 @@ int f2fs_release_page(struct page *page, gfp_t wait) static int f2fs_set_data_page_dirty(struct page *page) { - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; + struct inode *inode = page_file_mapping(page)->host; trace_f2fs_set_page_dirty(page, DATA); if (!PageUptodate(page)) SetPageUptodate(page); + if (PageSwapCache(page)) + return __set_page_dirty_nobuffers(page); if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { @@ -2875,6 +2952,126 @@ int f2fs_migrate_page(struct address_space *mapping, } #endif +#ifdef CONFIG_SWAP +/* Copied from generic_swapfile_activate() to check any holes */ +static int check_swap_activate(struct file *swap_file, unsigned int max) +{ + struct address_space *mapping = swap_file->f_mapping; + struct inode *inode = mapping->host; + unsigned blocks_per_page; + unsigned long page_no; + unsigned blkbits; + sector_t probe_block; + sector_t last_block; + sector_t lowest_block = -1; + sector_t highest_block = 0; + + blkbits = inode->i_blkbits; + blocks_per_page = PAGE_SIZE >> blkbits; + + /* + * Map all the blocks into the extent list. This code doesn't try + * to be very smart. + */ + probe_block = 0; + page_no = 0; + last_block = i_size_read(inode) >> blkbits; + while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + unsigned block_in_page; + sector_t first_block; + + cond_resched(); + + first_block = bmap(inode, probe_block); + if (first_block == 0) + goto bad_bmap; + + /* + * It must be PAGE_SIZE aligned on-disk + */ + if (first_block & (blocks_per_page - 1)) { + probe_block++; + goto reprobe; + } + + for (block_in_page = 1; block_in_page < blocks_per_page; + block_in_page++) { + sector_t block; + + block = bmap(inode, probe_block + block_in_page); + if (block == 0) + goto bad_bmap; + if (block != first_block + block_in_page) { + /* Discontiguity */ + probe_block++; + goto reprobe; + } + } + + first_block >>= (PAGE_SHIFT - blkbits); + if (page_no) { /* exclude the header page */ + if (first_block < lowest_block) + lowest_block = first_block; + if (first_block > highest_block) + highest_block = first_block; + } + + page_no++; + probe_block += blocks_per_page; +reprobe: + continue; + } + return 0; + +bad_bmap: + pr_err("swapon: swapfile has holes\n"); + return -EINVAL; +} + +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + struct inode *inode = file_inode(file); + int ret; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + return -EROFS; + + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + + ret = check_swap_activate(file, sis->max); + if (ret) + return ret; + + set_inode_flag(inode, FI_PIN_FILE); + f2fs_precache_extents(inode); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return 0; +} + +static void f2fs_swap_deactivate(struct file *file) +{ + struct inode *inode = file_inode(file); + + clear_inode_flag(inode, FI_PIN_FILE); +} +#else +static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, + sector_t *span) +{ + return -EOPNOTSUPP; +} + +static void f2fs_swap_deactivate(struct file *file) +{ +} +#endif + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, @@ -2887,6 +3084,8 @@ const struct address_space_operations f2fs_dblock_aops = { .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, + .swap_activate = f2fs_swap_activate, + .swap_deactivate = f2fs_swap_deactivate, #ifdef CONFIG_MIGRATION .migratepage = f2fs_migrate_page, #endif diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 99e9a5c37b71..7706049d23bf 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -27,8 +27,15 @@ static DEFINE_MUTEX(f2fs_stat_mutex); static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); int i; + /* these will be changed if online resize is done */ + si->main_area_segs = le32_to_cpu(raw_super->segment_count_main); + si->main_area_sections = le32_to_cpu(raw_super->section_count); + si->main_area_zones = si->main_area_sections / + le32_to_cpu(raw_super->secs_per_zone); + /* validation check of the segment numbers */ si->hit_largest = atomic64_read(&sbi->read_hit_largest); si->hit_cached = atomic64_read(&sbi->read_hit_cached); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 59bc46017855..85a1528f319f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -218,9 +218,8 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { - f2fs_msg(F2FS_I_SB(dir)->sb, KERN_WARNING, - "Corrupted max_depth of %lu: %u", - dir->i_ino, max_depth); + f2fs_warn(F2FS_I_SB(dir), "Corrupted max_depth of %lu: %u", + dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; f2fs_i_depth_write(dir, max_depth); } @@ -816,11 +815,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); if (unlikely(bit_pos > d->max || le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted namelen=%d, run fsck to fix.", - __func__, le16_to_cpu(de->name_len)); + f2fs_warn(sbi, "%s: corrupted namelen=%d, run fsck to fix.", + __func__, le16_to_cpu(de->name_len)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; goto out; } @@ -828,8 +826,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, int save_len = fstr->len; err = fscrypt_fname_disk_to_usr(d->inode, - (u32)de->hash_code, 0, - &de_name, fstr); + (u32)le32_to_cpu(de->hash_code), + 0, &de_name, fstr); if (err) goto out; diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index caf77fe8ac07..e60078460ad1 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -184,10 +184,9 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi, next_re = rb_entry(next, struct rb_entry, rb_node); if (cur_re->ofs + cur_re->len > next_re->ofs) { - f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " - "cur(%u, %u) next(%u, %u)", - cur_re->ofs, cur_re->len, - next_re->ofs, next_re->len); + f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); return false; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 06b89a9862ab..17382da7f0bd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -136,6 +136,9 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ bool test_dummy_encryption; /* test dummy encryption */ + block_t unusable_cap; /* Amount of space allowed to be + * unusable when disabling checkpoint + */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -412,6 +415,7 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, __u32) #define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, __u32) #define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15) +#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, __u64) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -476,8 +480,8 @@ static inline int get_inline_xattr_addrs(struct inode *inode); #define NR_INLINE_DENTRY(inode) (MAX_INLINE_DATA(inode) * BITS_PER_BYTE / \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ BITS_PER_BYTE + 1)) -#define INLINE_DENTRY_BITMAP_SIZE(inode) ((NR_INLINE_DENTRY(inode) + \ - BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_DENTRY_BITMAP_SIZE(inode) \ + DIV_ROUND_UP(NR_INLINE_DENTRY(inode), BITS_PER_BYTE) #define INLINE_RESERVED_SIZE(inode) (MAX_INLINE_DATA(inode) - \ ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ NR_INLINE_DENTRY(inode) + \ @@ -1052,6 +1056,8 @@ struct f2fs_io_info { bool retry; /* need to reallocate block address */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ + struct bio **bio; /* bio for ipu */ + sector_t *last_block; /* last block number in bio */ unsigned char version; /* version of the node */ }; @@ -1111,6 +1117,7 @@ enum { SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */ SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ + SBI_IS_RESIZEFS, /* resizefs is in process */ }; enum { @@ -1207,6 +1214,7 @@ struct f2fs_sb_info { /* for inode management */ struct list_head inode_list[NR_INODE_TYPE]; /* dirty inode list */ spinlock_t inode_lock[NR_INODE_TYPE]; /* for dirty inode list lock */ + struct mutex flush_lock; /* for flush exclusion */ /* for extent tree cache */ struct radix_tree_root extent_tree_root;/* cache extent cache entries */ @@ -1230,6 +1238,7 @@ struct f2fs_sb_info { unsigned int segs_per_sec; /* segments per section */ unsigned int secs_per_zone; /* sections per zone */ unsigned int total_sections; /* total section count */ + struct mutex resize_mutex; /* for resize exclusion */ unsigned int total_node_count; /* total node block count */ unsigned int total_valid_node_count; /* valid node block count */ loff_t max_file_blocks; /* max block index of file */ @@ -1247,6 +1256,7 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ + struct rw_semaphore quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1488,7 +1498,7 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page->mapping); + return F2FS_M_SB(page_file_mapping(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -1766,8 +1776,12 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; - if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - avail_user_block_count -= sbi->unusable_block_count; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (avail_user_block_count > sbi->unusable_block_count) + avail_user_block_count -= sbi->unusable_block_count; + else + avail_user_block_count = 0; + } if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) @@ -1795,7 +1809,20 @@ enospc: return -ENOSPC; } -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); +__printf(2, 3) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...); + +#define f2fs_err(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_ERR fmt, ##__VA_ARGS__) +#define f2fs_warn(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_WARNING fmt, ##__VA_ARGS__) +#define f2fs_notice(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_NOTICE fmt, ##__VA_ARGS__) +#define f2fs_info(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_INFO fmt, ##__VA_ARGS__) +#define f2fs_debug(sbi, fmt, ...) \ + f2fs_printk(sbi, KERN_DEBUG fmt, ##__VA_ARGS__) + static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, block_t count) @@ -1811,11 +1838,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, sbi->current_reserved_blocks + count); spin_unlock(&sbi->stat_lock); if (unlikely(inode->i_blocks < sectors)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks, - (unsigned long long)sectors); + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu, sectors:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks, + (unsigned long long)sectors); set_sbi_flag(sbi, SBI_NEED_FSCK); return; } @@ -1967,7 +1993,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, struct inode *inode, bool is_inode) { block_t valid_block_count; - unsigned int valid_node_count; + unsigned int valid_node_count, user_block_count; int err; if (is_inode) { @@ -1994,10 +2020,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) - valid_block_count += sbi->unusable_block_count; + user_block_count -= sbi->unusable_block_count; - if (unlikely(valid_block_count > sbi->user_block_count)) { + if (unlikely(valid_block_count > user_block_count)) { spin_unlock(&sbi->stat_lock); goto enospc; } @@ -2052,10 +2079,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, dquot_free_inode(inode); } else { if (unlikely(inode->i_blocks == 0)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks); + f2fs_warn(sbi, "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, + (unsigned long long)inode->i_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); return; } @@ -2191,6 +2217,9 @@ static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { + if (sbi->gc_mode == GC_URGENT) + return true; + if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) || get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) || get_pages(sbi, F2FS_WB_CP_DATA) || @@ -2198,7 +2227,7 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) get_pages(sbi, F2FS_DIO_WRITE)) return false; - if (SM_I(sbi) && SM_I(sbi)->dcc_info && + if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info && atomic_read(&SM_I(sbi)->dcc_info->queued_discard)) return false; @@ -2320,57 +2349,23 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) } /* - * Inode flags + * On-disk inode flags (f2fs_inode::i_flags) */ -#define F2FS_SECRM_FL 0x00000001 /* Secure deletion */ -#define F2FS_UNRM_FL 0x00000002 /* Undelete */ -#define F2FS_COMPR_FL 0x00000004 /* Compress file */ #define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ #define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ #define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ #define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ -/* Reserved for compression usage... */ -#define F2FS_DIRTY_FL 0x00000100 -#define F2FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ -#define F2FS_NOCOMPR_FL 0x00000400 /* Don't compress */ -#define F2FS_ENCRYPT_FL 0x00000800 /* encrypted file */ -/* End compression flags --- maybe not all used */ #define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ -#define F2FS_IMAGIC_FL 0x00002000 /* AFS directory */ -#define F2FS_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ -#define F2FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ -#define F2FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ -#define F2FS_HUGE_FILE_FL 0x00040000 /* Set to each huge file */ -#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */ -#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */ -#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */ -#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ -#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ - -#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */ -#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */ - -/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */ -#define F2FS_FL_XFLAG_VISIBLE (F2FS_SYNC_FL | \ - F2FS_IMMUTABLE_FL | \ - F2FS_APPEND_FL | \ - F2FS_NODUMP_FL | \ - F2FS_NOATIME_FL | \ - F2FS_PROJINHERIT_FL) /* Flags that should be inherited by new inodes from their parent. */ -#define F2FS_FL_INHERITED (F2FS_SECRM_FL | F2FS_UNRM_FL | F2FS_COMPR_FL |\ - F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL |\ - F2FS_NOCOMPR_FL | F2FS_JOURNAL_DATA_FL |\ - F2FS_NOTAIL_FL | F2FS_DIRSYNC_FL |\ - F2FS_PROJINHERIT_FL) +#define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ + F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ -#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_TOPDIR_FL)) +#define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL)) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) @@ -2856,9 +2851,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type)) { - f2fs_msg(sbi->sb, KERN_ERR, - "invalid blkaddr: %u, type: %d, run fsck to fix.", - blkaddr, type); + f2fs_err(sbi, "invalid blkaddr: %u, type: %d, run fsck to fix.", + blkaddr, type); f2fs_bug_on(sbi, 1); } } @@ -2989,8 +2983,6 @@ int f2fs_quota_sync(struct super_block *sb, int type); void f2fs_quota_off_umount(struct super_block *sb); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); int f2fs_sync_fs(struct super_block *sb, int sync); -extern __printf(3, 4) -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...); int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); /* @@ -3074,9 +3066,12 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi); void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi); -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi); +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi); +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable); void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3169,6 +3164,7 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, nid_t ino, enum page_type type); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); +int f2fs_merge_page_bio(struct f2fs_io_info *fio); void f2fs_submit_page_write(struct f2fs_io_info *fio); struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, block_t blk_addr, struct bio *bio); @@ -3214,6 +3210,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, unsigned int segno); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count); /* * recovery.c @@ -3686,7 +3683,8 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, if (test_opt(sbi, LFS) && (rw == WRITE) && block_unaligned_IO(inode, iocb, iter)) return true; - if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED)) + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) && + !(inode->i_flags & S_SWAPFILE)) return true; return false; @@ -3712,4 +3710,7 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi) return false; } +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _LINUX_F2FS_H */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 45b45f37d347..f8d46df8fa9e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -707,11 +707,9 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->btime.tv_nsec = fi->i_crtime.tv_nsec; } - flags = fi->i_flags & F2FS_FL_USER_VISIBLE; + flags = fi->i_flags; if (flags & F2FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; - if (flags & F2FS_COMPR_FL) - stat->attributes |= STATX_ATTR_COMPRESSED; if (IS_ENCRYPTED(inode)) stat->attributes |= STATX_ATTR_ENCRYPTED; if (flags & F2FS_IMMUTABLE_FL) @@ -720,7 +718,6 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | - STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); @@ -1026,7 +1023,7 @@ next_dnode: !f2fs_is_valid_blkaddr(sbi, *blkaddr, DATA_GENERIC_ENHANCE)) { f2fs_put_dnode(&dn); - return -EFAULT; + return -EFSCORRUPTED; } if (!f2fs_is_checkpointed_data(sbi, *blkaddr)) { @@ -1214,7 +1211,7 @@ roll_back: static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + pgoff_t nrpages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); pgoff_t start = offset >> PAGE_SHIFT; pgoff_t end = (offset + len) >> PAGE_SHIFT; int ret; @@ -1467,7 +1464,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* avoid gc operation during block exchange */ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -1531,7 +1528,12 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + if (f2fs_is_pinned_file(inode)) + map.m_seg_type = CURSEG_COLD_DATA; + + err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? + F2FS_GET_BLOCK_PRE_DIO : + F2FS_GET_BLOCK_PRE_AIO)); if (err) { pgoff_t last_off; @@ -1648,44 +1650,22 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) return 0; } -static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) +static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { - struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags; - - if (IS_ENCRYPTED(inode)) - flags |= F2FS_ENCRYPT_FL; - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) - flags |= F2FS_INLINE_DATA_FL; - if (is_inode_flag_set(inode, FI_PIN_FILE)) - flags |= F2FS_NOCOW_FL; - - flags &= F2FS_FL_USER_VISIBLE; - - return put_user(flags, (int __user *)arg); -} - -static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) -{ - struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int oldflags; + u32 oldflags; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - flags = f2fs_mask_flags(inode->i_mode, flags); - oldflags = fi->i_flags; - if ((flags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) + if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL)) if (!capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - flags = flags & F2FS_FL_USER_MODIFIABLE; - flags |= oldflags & ~F2FS_FL_USER_MODIFIABLE; - fi->i_flags = flags; + fi->i_flags = iflags | (oldflags & ~mask); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); @@ -1698,26 +1678,124 @@ static int __f2fs_ioc_setflags(struct inode *inode, unsigned int flags) return 0; } +/* FS_IOC_GETFLAGS and FS_IOC_SETFLAGS support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_GETFLAGS, add an entry + * for it to f2fs_fsflags_map[], and add its FS_*_FL equivalent to + * F2FS_GETTABLE_FS_FL. To also make it settable via FS_IOC_SETFLAGS, also add + * its FS_*_FL equivalent to F2FS_SETTABLE_FS_FL. + */ + +static const struct { + u32 iflag; + u32 fsflag; +} f2fs_fsflags_map[] = { + { F2FS_SYNC_FL, FS_SYNC_FL }, + { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, + { F2FS_APPEND_FL, FS_APPEND_FL }, + { F2FS_NODUMP_FL, FS_NODUMP_FL }, + { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_INDEX_FL, FS_INDEX_FL }, + { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, + { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, +}; + +#define F2FS_GETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_INDEX_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL | \ + FS_ENCRYPT_FL | \ + FS_INLINE_DATA_FL | \ + FS_NOCOW_FL) + +#define F2FS_SETTABLE_FS_FL ( \ + FS_SYNC_FL | \ + FS_IMMUTABLE_FL | \ + FS_APPEND_FL | \ + FS_NODUMP_FL | \ + FS_NOATIME_FL | \ + FS_DIRSYNC_FL | \ + FS_PROJINHERIT_FL) + +/* Convert f2fs on-disk i_flags to FS_IOC_{GET,SET}FLAGS flags */ +static inline u32 f2fs_iflags_to_fsflags(u32 iflags) +{ + u32 fsflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (iflags & f2fs_fsflags_map[i].iflag) + fsflags |= f2fs_fsflags_map[i].fsflag; + + return fsflags; +} + +/* Convert FS_IOC_{GET,SET}FLAGS flags to f2fs on-disk i_flags */ +static inline u32 f2fs_fsflags_to_iflags(u32 fsflags) +{ + u32 iflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_fsflags_map); i++) + if (fsflags & f2fs_fsflags_map[i].fsflag) + iflags |= f2fs_fsflags_map[i].iflag; + + return iflags; +} + +static int f2fs_ioc_getflags(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_inode_info *fi = F2FS_I(inode); + u32 fsflags = f2fs_iflags_to_fsflags(fi->i_flags); + + if (IS_ENCRYPTED(inode)) + fsflags |= FS_ENCRYPT_FL; + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) + fsflags |= FS_INLINE_DATA_FL; + if (is_inode_flag_set(inode, FI_PIN_FILE)) + fsflags |= FS_NOCOW_FL; + + fsflags &= F2FS_GETTABLE_FS_FL; + + return put_user(fsflags, (int __user *)arg); +} + static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - unsigned int flags; + u32 fsflags; + u32 iflags; int ret; if (!inode_owner_or_capable(inode)) return -EACCES; - if (get_user(flags, (int __user *)arg)) + if (get_user(fsflags, (int __user *)arg)) return -EFAULT; + if (fsflags & ~F2FS_GETTABLE_FS_FL) + return -EOPNOTSUPP; + fsflags &= F2FS_SETTABLE_FS_FL; + + iflags = f2fs_fsflags_to_iflags(fsflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; inode_lock(inode); - ret = __f2fs_ioc_setflags(inode, flags); - + ret = f2fs_setflags_common(inode, iflags, + f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); inode_unlock(inode); mnt_drop_write_file(filp); return ret; @@ -1764,9 +1842,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) * f2fs_is_atomic_file. */ if (get_dirty_pages(inode)) - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Unexpected flush for atomic writes: ino=%lu, npages=%u", - inode->i_ino, get_dirty_pages(inode)); + f2fs_warn(F2FS_I_SB(inode), "Unexpected flush for atomic writes: ino=%lu, npages=%u", + inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2201,8 +2278,7 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg) return -EROFS; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { - f2fs_msg(sbi->sb, KERN_INFO, - "Skipping Checkpoint. Checkpoints currently disabled."); + f2fs_info(sbi, "Skipping Checkpoint. Checkpoints currently disabled."); return -EINVAL; } @@ -2291,7 +2367,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, if (!fragmented) goto out; - sec_num = (total + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); + sec_num = DIV_ROUND_UP(total, BLKS_PER_SEC(sbi)); /* * make sure there are enough free section for LFS allocation, this can @@ -2587,10 +2663,8 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) if (!f2fs_is_multi_device(sbi) || sbi->s_ndevs - 1 <= range.dev_num || __is_large_section(sbi)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Can't flush %u in %d for segs_per_sec %u != 1", - range.dev_num, sbi->s_ndevs, - sbi->segs_per_sec); + f2fs_warn(sbi, "Can't flush %u in %d for segs_per_sec %u != 1", + range.dev_num, sbi->s_ndevs, sbi->segs_per_sec); return -EINVAL; } @@ -2727,47 +2801,56 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) } #endif -/* Transfer internal flags to xflags */ -static inline __u32 f2fs_iflags_to_xflags(unsigned long iflags) -{ - __u32 xflags = 0; - - if (iflags & F2FS_SYNC_FL) - xflags |= FS_XFLAG_SYNC; - if (iflags & F2FS_IMMUTABLE_FL) - xflags |= FS_XFLAG_IMMUTABLE; - if (iflags & F2FS_APPEND_FL) - xflags |= FS_XFLAG_APPEND; - if (iflags & F2FS_NODUMP_FL) - xflags |= FS_XFLAG_NODUMP; - if (iflags & F2FS_NOATIME_FL) - xflags |= FS_XFLAG_NOATIME; - if (iflags & F2FS_PROJINHERIT_FL) - xflags |= FS_XFLAG_PROJINHERIT; +/* FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR support */ + +/* + * To make a new on-disk f2fs i_flag gettable via FS_IOC_FSGETXATTR and settable + * via FS_IOC_FSSETXATTR, add an entry for it to f2fs_xflags_map[], and add its + * FS_XFLAG_* equivalent to F2FS_SUPPORTED_XFLAGS. + */ + +static const struct { + u32 iflag; + u32 xflag; +} f2fs_xflags_map[] = { + { F2FS_SYNC_FL, FS_XFLAG_SYNC }, + { F2FS_IMMUTABLE_FL, FS_XFLAG_IMMUTABLE }, + { F2FS_APPEND_FL, FS_XFLAG_APPEND }, + { F2FS_NODUMP_FL, FS_XFLAG_NODUMP }, + { F2FS_NOATIME_FL, FS_XFLAG_NOATIME }, + { F2FS_PROJINHERIT_FL, FS_XFLAG_PROJINHERIT }, +}; + +#define F2FS_SUPPORTED_XFLAGS ( \ + FS_XFLAG_SYNC | \ + FS_XFLAG_IMMUTABLE | \ + FS_XFLAG_APPEND | \ + FS_XFLAG_NODUMP | \ + FS_XFLAG_NOATIME | \ + FS_XFLAG_PROJINHERIT) + +/* Convert f2fs on-disk i_flags to FS_IOC_FS{GET,SET}XATTR flags */ +static inline u32 f2fs_iflags_to_xflags(u32 iflags) +{ + u32 xflags = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) + if (iflags & f2fs_xflags_map[i].iflag) + xflags |= f2fs_xflags_map[i].xflag; + return xflags; } -#define F2FS_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ - FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ - FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) - -/* Transfer xflags flags to internal */ -static inline unsigned long f2fs_xflags_to_iflags(__u32 xflags) +/* Convert FS_IOC_FS{GET,SET}XATTR flags to f2fs on-disk i_flags */ +static inline u32 f2fs_xflags_to_iflags(u32 xflags) { - unsigned long iflags = 0; + u32 iflags = 0; + int i; - if (xflags & FS_XFLAG_SYNC) - iflags |= F2FS_SYNC_FL; - if (xflags & FS_XFLAG_IMMUTABLE) - iflags |= F2FS_IMMUTABLE_FL; - if (xflags & FS_XFLAG_APPEND) - iflags |= F2FS_APPEND_FL; - if (xflags & FS_XFLAG_NODUMP) - iflags |= F2FS_NODUMP_FL; - if (xflags & FS_XFLAG_NOATIME) - iflags |= F2FS_NOATIME_FL; - if (xflags & FS_XFLAG_PROJINHERIT) - iflags |= F2FS_PROJINHERIT_FL; + for (i = 0; i < ARRAY_SIZE(f2fs_xflags_map); i++) + if (xflags & f2fs_xflags_map[i].xflag) + iflags |= f2fs_xflags_map[i].iflag; return iflags; } @@ -2779,8 +2862,7 @@ static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) struct fsxattr fa; memset(&fa, 0, sizeof(struct fsxattr)); - fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags & - F2FS_FL_USER_VISIBLE); + fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags); if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, @@ -2818,9 +2900,8 @@ static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); - struct f2fs_inode_info *fi = F2FS_I(inode); struct fsxattr fa; - unsigned int flags; + u32 iflags; int err; if (copy_from_user(&fa, (struct fsxattr __user *)arg, sizeof(fa))) @@ -2830,11 +2911,11 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) if (!inode_owner_or_capable(inode)) return -EACCES; - if (fa.fsx_xflags & ~F2FS_SUPPORTED_FS_XFLAGS) + if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS) return -EOPNOTSUPP; - flags = f2fs_xflags_to_iflags(fa.fsx_xflags); - if (f2fs_mask_flags(inode->i_mode, flags) != flags) + iflags = f2fs_xflags_to_iflags(fa.fsx_xflags); + if (f2fs_mask_flags(inode->i_mode, iflags) != iflags) return -EOPNOTSUPP; err = mnt_want_write_file(filp); @@ -2845,9 +2926,8 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) err = f2fs_ioctl_check_project(inode, &fa); if (err) goto out; - flags = (fi->i_flags & ~F2FS_FL_XFLAG_VISIBLE) | - (flags & F2FS_FL_XFLAG_VISIBLE); - err = __f2fs_ioc_setflags(inode, flags); + err = f2fs_setflags_common(inode, iflags, + f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); if (err) goto out; @@ -2869,10 +2949,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) fi->i_gc_failures[GC_FAILURE_PIN] + 1); if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: Enable GC = ino %lx after %x GC trials", - __func__, inode->i_ino, - fi->i_gc_failures[GC_FAILURE_PIN]); + f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", + __func__, inode->i_ino, + fi->i_gc_failures[GC_FAILURE_PIN]); clear_inode_flag(inode, FI_PIN_FILE); return -EAGAIN; } @@ -2885,9 +2964,6 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) __u32 pin; int ret = 0; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (get_user(pin, (__u32 __user *)arg)) return -EFAULT; @@ -2980,6 +3056,27 @@ static int f2fs_ioc_precache_extents(struct file *filp, unsigned long arg) return f2fs_precache_extents(file_inode(filp)); } +static int f2fs_ioc_resize_fs(struct file *filp, unsigned long arg) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp)); + __u64 block_count; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&block_count, (void __user *)arg, + sizeof(block_count))) + return -EFAULT; + + ret = f2fs_resize_fs(sbi, block_count); + + return ret; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp))))) @@ -3036,6 +3133,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_set_pin_file(filp, arg); case F2FS_IOC_PRECACHE_EXTENTS: return f2fs_ioc_precache_extents(filp, arg); + case F2FS_IOC_RESIZE_FS: + return f2fs_ioc_resize_fs(filp, arg); default: return -ENOTTY; } @@ -3149,6 +3248,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GET_PIN_FILE: case F2FS_IOC_SET_PIN_FILE: case F2FS_IOC_PRECACHE_EXTENTS: + case F2FS_IOC_RESIZE_FS: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 963fb4571fd9..6691f526fa40 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -311,10 +311,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; - unsigned int last_segment = MAIN_SEGS(sbi); + unsigned int last_segment; unsigned int nsearched = 0; mutex_lock(&dirty_i->seglist_lock); + last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); @@ -387,7 +388,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, goto next; /* Don't touch checkpointed data */ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && - get_ckpt_valid_blocks(sbi, segno))) + get_ckpt_valid_blocks(sbi, segno) && + p.alloc_mode != SSR)) goto next; if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap)) goto next; @@ -404,7 +406,8 @@ next: sm->last_victim[p.gc_mode] = last_victim + 1; else sm->last_victim[p.gc_mode] = segno + 1; - sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); + sm->last_victim[p.gc_mode] %= + (MAIN_SECS(sbi) * sbi->segs_per_sec); break; } } @@ -615,9 +618,8 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (sum->version != dni->version) { - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: valid data with mismatched node version.", - __func__); + f2fs_warn(sbi, "%s: valid data with mismatched node version.", + __func__); set_sbi_flag(sbi, SBI_NEED_FSCK); } @@ -658,7 +660,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) dn.data_blkaddr = ei.blk + index - ei.fofs; if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_page; } goto got_it; @@ -676,7 +678,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { - err = -EFAULT; + err = -EFSCORRUPTED; goto put_page; } got_it: @@ -1180,9 +1182,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, sum = page_address(sum_page); if (type != GET_SUM_TYPE((&sum->footer))) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent segment (%u) " - "type [%d, %d] in SSA and SIT", - segno, type, GET_SUM_TYPE((&sum->footer))); + f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", + segno, type, GET_SUM_TYPE((&sum->footer))); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_stop_checkpoint(sbi, false); goto skip; @@ -1360,3 +1361,176 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) SIT_I(sbi)->last_victim[ALLOC_NEXT] = GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } + +static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, + unsigned int end) +{ + int type; + unsigned int segno, next_inuse; + int err = 0; + + /* Move out cursegs from the target range */ + for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++) + allocate_segment_for_resize(sbi, type, start, end); + + /* do GC to move out valid blocks in the range */ + for (segno = start; segno <= end; segno += sbi->segs_per_sec) { + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), + }; + + mutex_lock(&sbi->gc_mutex); + do_garbage_collect(sbi, segno, &gc_list, FG_GC); + mutex_unlock(&sbi->gc_mutex); + put_gc_inode(&gc_list); + + if (get_valid_blocks(sbi, segno, true)) + return -EAGAIN; + } + + err = f2fs_sync_fs(sbi->sb, 1); + if (err) + return err; + + next_inuse = find_next_inuse(FREE_I(sbi), end + 1, start); + if (next_inuse <= end) { + f2fs_err(sbi, "segno %u should be free but still inuse!", + next_inuse); + f2fs_bug_on(sbi, 1); + } + return err; +} + +static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) +{ + struct f2fs_super_block *raw_sb = F2FS_RAW_SUPER(sbi); + int section_count = le32_to_cpu(raw_sb->section_count); + int segment_count = le32_to_cpu(raw_sb->segment_count); + int segment_count_main = le32_to_cpu(raw_sb->segment_count_main); + long long block_count = le64_to_cpu(raw_sb->block_count); + int segs = secs * sbi->segs_per_sec; + + raw_sb->section_count = cpu_to_le32(section_count + secs); + raw_sb->segment_count = cpu_to_le32(segment_count + segs); + raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); + raw_sb->block_count = cpu_to_le64(block_count + + (long long)segs * sbi->blocks_per_seg); +} + +static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) +{ + int segs = secs * sbi->segs_per_sec; + long long user_block_count = + le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); + + SM_I(sbi)->segment_count = (int)SM_I(sbi)->segment_count + segs; + MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; + FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; + FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + + (long long)segs * sbi->blocks_per_seg); +} + +int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) +{ + __u64 old_block_count, shrunk_blocks; + unsigned int secs; + int gc_mode, gc_type; + int err = 0; + __u32 rem; + + old_block_count = le64_to_cpu(F2FS_RAW_SUPER(sbi)->block_count); + if (block_count > old_block_count) + return -EINVAL; + + /* new fs size should align to section size */ + div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); + if (rem) + return -EINVAL; + + if (block_count == old_block_count) + return 0; + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_err(sbi, "Should run fsck to repair first."); + return -EFSCORRUPTED; + } + + if (test_opt(sbi, DISABLE_CHECKPOINT)) { + f2fs_err(sbi, "Checkpoint should be enabled."); + return -EINVAL; + } + + freeze_bdev(sbi->sb->s_bdev); + + shrunk_blocks = old_block_count - block_count; + secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); + spin_lock(&sbi->stat_lock); + if (shrunk_blocks + valid_user_blocks(sbi) + + sbi->current_reserved_blocks + sbi->unusable_block_count + + F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count) + err = -ENOSPC; + else + sbi->user_block_count -= shrunk_blocks; + spin_unlock(&sbi->stat_lock); + if (err) { + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; + } + + mutex_lock(&sbi->resize_mutex); + set_sbi_flag(sbi, SBI_IS_RESIZEFS); + + mutex_lock(&DIRTY_I(sbi)->seglist_lock); + + MAIN_SECS(sbi) -= secs; + + for (gc_mode = 0; gc_mode < MAX_GC_POLICY; gc_mode++) + if (SIT_I(sbi)->last_victim[gc_mode] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + SIT_I(sbi)->last_victim[gc_mode] = 0; + + for (gc_type = BG_GC; gc_type <= FG_GC; gc_type++) + if (sbi->next_victim_seg[gc_type] >= + MAIN_SECS(sbi) * sbi->segs_per_sec) + sbi->next_victim_seg[gc_type] = NULL_SEGNO; + + mutex_unlock(&DIRTY_I(sbi)->seglist_lock); + + err = free_segment_range(sbi, MAIN_SECS(sbi) * sbi->segs_per_sec, + MAIN_SEGS(sbi) - 1); + if (err) + goto out; + + update_sb_metadata(sbi, -secs); + + err = f2fs_commit_super(sbi, false); + if (err) { + update_sb_metadata(sbi, secs); + goto out; + } + + update_fs_metadata(sbi, -secs); + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + err = f2fs_sync_fs(sbi->sb, 1); + if (err) { + update_fs_metadata(sbi, secs); + update_sb_metadata(sbi, secs); + f2fs_commit_super(sbi, false); + } +out: + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_err(sbi, "resize_fs failed, should run fsck to repair!"); + + MAIN_SECS(sbi) += secs; + spin_lock(&sbi->stat_lock); + sbi->user_block_count += shrunk_blocks; + spin_unlock(&sbi->stat_lock); + } + clear_sbi_flag(sbi, SBI_IS_RESIZEFS); + mutex_unlock(&sbi->resize_mutex); + thaw_bdev(sbi->sb->s_bdev, sbi->sb); + return err; +} diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 404d2462a0fe..3613efca8c00 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -140,11 +140,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (unlikely(dn->data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(dn); set_sbi_flag(fio.sbi, SBI_NEED_FSCK); - f2fs_msg(fio.sbi->sb, KERN_WARNING, - "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " - "run fsck to fix.", - __func__, dn->inode->i_ino, dn->data_blkaddr); - return -EINVAL; + f2fs_warn(fio.sbi, "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dn->inode->i_ino, dn->data_blkaddr); + return -EFSCORRUPTED; } f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page)); @@ -383,11 +381,9 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (unlikely(dn.data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(&dn); set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); - f2fs_msg(F2FS_P_SB(page)->sb, KERN_WARNING, - "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, " - "run fsck to fix.", - __func__, dir->i_ino, dn.data_blkaddr); - err = -EINVAL; + f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + __func__, dir->i_ino, dn.data_blkaddr); + err = -EFSCORRUPTED; goto out; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ccb02226dd2c..a33d7a849b2d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -74,7 +74,7 @@ static int __written_first_block(struct f2fs_sb_info *sbi, if (!__is_valid_data_blkaddr(addr)) return 1; if (!f2fs_is_valid_blkaddr(sbi, addr, DATA_GENERIC_ENHANCE)) - return -EFAULT; + return -EFSCORRUPTED; return 0; } @@ -176,9 +176,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) calculated = f2fs_inode_chksum(sbi, page); if (provided != calculated) - f2fs_msg(sbi->sb, KERN_WARNING, - "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page->index, ino_of_node(page), provided, calculated); + f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", + page->index, ino_of_node(page), provided, calculated); return provided == calculated; } @@ -202,50 +201,41 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); if (!iblocks) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, " - "run fsck to fix.", - __func__, inode->i_ino, iblocks); + f2fs_warn(sbi, "%s: corrupted inode i_blocks i_ino=%lx iblocks=%llu, run fsck to fix.", + __func__, inode->i_ino, iblocks); return false; } if (ino_of_node(node_page) != nid_of_node(node_page)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode footer i_ino=%lx, ino,nid: " - "[%u, %u] run fsck to fix.", - __func__, inode->i_ino, - ino_of_node(node_page), nid_of_node(node_page)); + f2fs_warn(sbi, "%s: corrupted inode footer i_ino=%lx, ino,nid: [%u, %u] run fsck to fix.", + __func__, inode->i_ino, + ino_of_node(node_page), nid_of_node(node_page)); return false; } if (f2fs_sb_has_flexible_inline_xattr(sbi) && !f2fs_has_extra_attr(inode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: corrupted inode ino=%lx, run fsck to fix.", - __func__, inode->i_ino); + f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.", + __func__, inode->i_ino); return false; } if (f2fs_has_extra_attr(inode) && !f2fs_sb_has_extra_attr(sbi)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) is with extra_attr, " - "but extra_attr feature is off", - __func__, inode->i_ino); + f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", + __func__, inode->i_ino); return false; } if (fi->i_extra_isize > F2FS_TOTAL_EXTRA_ATTR_SIZE || fi->i_extra_isize % sizeof(__le32)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, " - "max: %zu", - __func__, inode->i_ino, fi->i_extra_isize, - F2FS_TOTAL_EXTRA_ATTR_SIZE); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_extra_isize: %d, max: %zu", + __func__, inode->i_ino, fi->i_extra_isize, + F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } @@ -255,11 +245,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) (!fi->i_inline_xattr_size || fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) has corrupted " - "i_inline_xattr_size: %d, max: %zu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MAX_INLINE_XATTR_SIZE); return false; } @@ -272,11 +260,9 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) !f2fs_is_valid_blkaddr(sbi, ei->blk + ei->len - 1, DATA_GENERIC_ENHANCE))) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx) extent info [%u, %u, %u] " - "is incorrect, run fsck to fix", - __func__, inode->i_ino, - ei->blk, ei->fofs, ei->len); + f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix", + __func__, inode->i_ino, + ei->blk, ei->fofs, ei->len); return false; } } @@ -284,19 +270,15 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) if (f2fs_has_inline_data(inode) && (!S_ISREG(inode->i_mode) && !S_ISLNK(inode->i_mode))) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx, mode=%u) should not have " - "inline_data, run fsck to fix", - __func__, inode->i_ino, inode->i_mode); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_data, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); return false; } if (f2fs_has_inline_dentry(inode) && !S_ISDIR(inode->i_mode)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: inode (ino=%lx, mode=%u) should not have " - "inline_dentry, run fsck to fix", - __func__, inode->i_ino, inode->i_mode); + f2fs_warn(sbi, "%s: inode (ino=%lx, mode=%u) should not have inline_dentry, run fsck to fix", + __func__, inode->i_ino, inode->i_mode); return false; } @@ -343,6 +325,8 @@ static int do_read_inode(struct inode *inode) le16_to_cpu(ri->i_gc_failures); fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid); fi->i_flags = le32_to_cpu(ri->i_flags); + if (S_ISREG(inode->i_mode)) + fi->i_flags &= ~F2FS_PROJINHERIT_FL; fi->flags = 0; fi->i_advise = ri->i_advise; fi->i_pino = le32_to_cpu(ri->i_pino); @@ -374,7 +358,7 @@ static int do_read_inode(struct inode *inode) if (!sanity_check_inode(inode, node_page)) { f2fs_put_page(node_page, 1); - return -EINVAL; + return -EFSCORRUPTED; } /* check data exist */ @@ -783,8 +767,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_get_node_info(sbi, inode->i_ino, &ni); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "May loss orphan inode, run fsck to fix."); + f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } @@ -792,8 +775,7 @@ void f2fs_handle_failed_inode(struct inode *inode) err = f2fs_acquire_orphan_inode(sbi); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "Too many orphan inodes, run fsck to fix."); + f2fs_warn(sbi, "Too many orphan inodes, run fsck to fix."); } else { f2fs_add_orphan_inode(inode); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0f77f9242751..c5b99042e6f2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -385,9 +385,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) int err = 0; if (f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "skip recovering inline_dots inode (ino:%lu, pino:%u) " - "in readonly mountpoint", dir->i_ino, pino); + f2fs_info(sbi, "skip recovering inline_dots inode (ino:%lu, pino:%u) in readonly mountpoint", + dir->i_ino, pino); return 0; } @@ -484,9 +483,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { - f2fs_msg(inode->i_sb, KERN_WARNING, - "Inconsistent encryption contexts: %lu/%lu", - dir->i_ino, inode->i_ino); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent encryption contexts: %lu/%lu", + dir->i_ino, inode->i_ino); err = -EPERM; goto out_iput; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 18a038a2a9fa..a18b2a895771 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -34,10 +34,9 @@ int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) { if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: out-of-range nid=%x, run fsck to fix.", - __func__, nid); - return -EINVAL; + f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.", + __func__, nid); + return -EFSCORRUPTED; } return 0; } @@ -1189,10 +1188,8 @@ int f2fs_remove_inode_page(struct inode *inode) } if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) { - f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, - "Inconsistent i_blocks, ino:%lu, iblocks:%llu", - inode->i_ino, - (unsigned long long)inode->i_blocks); + f2fs_warn(F2FS_I_SB(inode), "Inconsistent i_blocks, ino:%lu, iblocks:%llu", + inode->i_ino, (unsigned long long)inode->i_blocks); set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); } @@ -1291,7 +1288,7 @@ static int read_node_page(struct page *page, int op_flags) if (PageUptodate(page)) { if (!f2fs_inode_chksum_verify(sbi, page)) { ClearPageUptodate(page); - return -EBADMSG; + return -EFSBADCRC; } return LOCKED_PAGE; } @@ -1375,16 +1372,15 @@ repeat: } if (!f2fs_inode_chksum_verify(sbi, page)) { - err = -EBADMSG; + err = -EFSBADCRC; goto out_err; } page_hit: if(unlikely(nid != nid_of_node(page))) { - f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, " - "nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); + f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(page)); err = -EINVAL; out_err: ClearPageUptodate(page); @@ -1752,9 +1748,8 @@ continue_unlock: break; } if (!ret && atomic && !marked) { - f2fs_msg(sbi->sb, KERN_DEBUG, - "Retry to write fsync mark: ino=%u, idx=%lx", - ino, last_page->index); + f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", + ino, last_page->index); lock_page(last_page); f2fs_wait_on_page_writeback(last_page, NODE, true, true); set_page_dirty(last_page); @@ -2304,8 +2299,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, !mount); - f2fs_msg(sbi->sb, KERN_ERR, - "NAT is corrupt, run fsck to fix it"); + f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } } @@ -2725,7 +2719,7 @@ static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, i = 1; } for (; i < NAT_ENTRY_PER_BLOCK; i++) { - if (nat_blk->entries[i].block_addr != NULL_ADDR) + if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } if (valid == 0) { @@ -2915,7 +2909,7 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) nm_i->full_nat_bits = nm_i->nat_bits + 8; nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint"); + f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e04f82b3f4fc..783773e4560d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -188,10 +188,9 @@ out: name = "<encrypted>"; else name = raw_inode->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), name, - IS_ERR(dir) ? 0 : dir->i_ino, err); + f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d", + __func__, ino_of_node(ipage), name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } @@ -292,9 +291,8 @@ static int recover_inode(struct inode *inode, struct page *page) else name = F2FS_INODE(page)->i_name; - f2fs_msg(inode->i_sb, KERN_NOTICE, - "recover_inode: ino = %x, name = %s, inline = %x", - ino_of_node(page), name, raw->i_inline); + f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x", + ino_of_node(page), name, raw->i_inline); return 0; } @@ -371,10 +369,9 @@ next: /* sanity check in order to detect looped node chain */ if (++loop_cnt >= free_blocks || blkaddr == next_blkaddr_of_node(page)) { - f2fs_msg(sbi->sb, KERN_NOTICE, - "%s: detect looped node chain, " - "blkaddr:%u, next:%u", - __func__, blkaddr, next_blkaddr_of_node(page)); + f2fs_notice(sbi, "%s: detect looped node chain, blkaddr:%u, next:%u", + __func__, blkaddr, + next_blkaddr_of_node(page)); f2fs_put_page(page, 1); err = -EINVAL; break; @@ -553,11 +550,10 @@ retry_dn: f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(dn.node_page), - ofs_of_node(page)); - err = -EFAULT; + f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", + inode->i_ino, ofs_of_node(dn.node_page), + ofs_of_node(page)); + err = -EFSCORRUPTED; goto err; } @@ -569,13 +565,13 @@ retry_dn: if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } if (__is_valid_data_blkaddr(dest) && !f2fs_is_valid_blkaddr(sbi, dest, META_POR)) { - err = -EFAULT; + err = -EFSCORRUPTED; goto err; } @@ -642,11 +638,9 @@ retry_prev: err: f2fs_put_dnode(&dn); out: - f2fs_msg(sbi->sb, KERN_NOTICE, - "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", - inode->i_ino, - file_keep_isize(inode) ? "keep" : "recover", - recovered, err); + f2fs_notice(sbi, "recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d", + inode->i_ino, file_keep_isize(inode) ? "keep" : "recover", + recovered, err); return err; } @@ -734,8 +728,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) #endif if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_INFO, - "recover fsync data on readonly fs"); + f2fs_info(sbi, "recover fsync data on readonly fs"); sbi->sb->s_flags &= ~SB_RDONLY; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8dee063c833f..a661ac32e829 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -546,9 +546,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) if (test_opt(sbi, DATA_FLUSH)) { struct blk_plug plug; + mutex_lock(&sbi->flush_lock); + blk_start_plug(&plug); f2fs_sync_dirty_inodes(sbi, FILE_INODE); blk_finish_plug(&plug); + + mutex_unlock(&sbi->flush_lock); } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); @@ -869,11 +873,14 @@ void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi) mutex_unlock(&dirty_i->seglist_lock); } -int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) +block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi) { + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg; block_t holes[2] = {0, 0}; /* DATA and NODE */ + block_t unusable; struct seg_entry *se; unsigned int segno; @@ -887,10 +894,20 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi) } mutex_unlock(&dirty_i->seglist_lock); - if (holes[DATA] > ovp || holes[NODE] > ovp) + unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE]; + if (unusable > ovp_holes) + return unusable - ovp_holes; + return 0; +} + +int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable) +{ + int ovp_hole_segs = + (overprovision_segments(sbi) - reserved_segments(sbi)); + if (unusable > F2FS_OPTION(sbi).unusable_cap) return -EAGAIN; if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) && - dirty_segments(sbi) > overprovision_segments(sbi)) + dirty_segments(sbi) > ovp_hole_segs) return -EAGAIN; return 0; } @@ -1480,6 +1497,10 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); + if (dpolicy->timeout != 0 && + f2fs_time_over(sbi, dpolicy->timeout)) + break; + if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; @@ -1740,8 +1761,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, devi = f2fs_target_device_index(sbi, blkstart); if (blkstart < FDEV(devi).start_blk || blkstart > FDEV(devi).end_blk) { - f2fs_msg(sbi->sb, KERN_ERR, "Invalid block %x", - blkstart); + f2fs_err(sbi, "Invalid block %x", blkstart); return -EIO; } blkstart -= FDEV(devi).start_blk; @@ -1754,10 +1774,9 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, if (sector & (bdev_zone_sectors(bdev) - 1) || nr_sects != bdev_zone_sectors(bdev)) { - f2fs_msg(sbi->sb, KERN_ERR, - "(%d) %s: Unaligned zone reset attempted (block %x + %x)", - devi, sbi->s_ndevs ? FDEV(devi).path: "", - blkstart, blklen); + f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)", + devi, sbi->s_ndevs ? FDEV(devi).path : "", + blkstart, blklen); return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); @@ -2121,15 +2140,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_set_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks--; del = 0; @@ -2150,15 +2168,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) mir_exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { - f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error " - "when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", + blkaddr, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Bitmap was wrongly cleared, blk:%u", blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", + blkaddr); f2fs_bug_on(sbi, 1); se->valid_blocks++; del = 0; @@ -2640,6 +2657,39 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, stat_inc_seg_type(sbi, curseg); } +void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, + unsigned int start, unsigned int end) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno; + + down_read(&SM_I(sbi)->curseg_lock); + mutex_lock(&curseg->curseg_mutex); + down_write(&SIT_I(sbi)->sentry_lock); + + segno = CURSEG_I(sbi, type)->segno; + if (segno < start || segno > end) + goto unlock; + + if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type)) + change_curseg(sbi, type); + else + new_curseg(sbi, type, true); + + stat_inc_seg_type(sbi, curseg); + + locate_dirty_segment(sbi, segno); +unlock: + up_write(&SIT_I(sbi)->sentry_lock); + + if (segno != curseg->segno) + f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u", + type, segno, curseg->segno); + + mutex_unlock(&curseg->curseg_mutex); + up_read(&SM_I(sbi)->curseg_lock); +} + void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; @@ -2772,9 +2822,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) goto out; if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { - f2fs_msg(sbi->sb, KERN_WARNING, - "Found FS corruption, run fsck to fix."); - return -EIO; + f2fs_warn(sbi, "Found FS corruption, run fsck to fix."); + return -EFSCORRUPTED; } /* start/end segment number in main_area */ @@ -3197,12 +3246,17 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) { set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EFAULT; + f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.", + __func__, segno); + return -EFSCORRUPTED; } stat_inc_inplace_blocks(fio->sbi); - err = f2fs_submit_page_bio(fio); + if (fio->bio) + err = f2fs_merge_page_bio(fio); + else + err = f2fs_submit_page_bio(fio); if (!err) { update_device_state(fio); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); @@ -3393,6 +3447,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) seg_i = CURSEG_I(sbi, i); segno = le32_to_cpu(ckpt->cur_data_segno[i]); blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]); + if (blk_off > ENTRIES_IN_SUM) { + f2fs_bug_on(sbi, 1); + f2fs_put_page(page, 1); + return -EFAULT; + } seg_i->next_segno = segno; reset_curseg(sbi, i, 0); seg_i->alloc_type = ckpt->alloc_type[i]; @@ -3530,8 +3589,11 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) /* sanity check for summary blocks */ if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES || - sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) + sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) { + f2fs_err(sbi, "invalid journal entries nats %u sits %u\n", + nats_in_cursum(nat_j), sits_in_cursum(sit_j)); return -EINVAL; + } return 0; } @@ -3762,7 +3824,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_journal *journal = curseg->journal; struct sit_entry_set *ses, *tmp; struct list_head *head = &SM_I(sbi)->sit_entry_set; - bool to_journal = true; + bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS); struct seg_entry *se; down_write(&sit_i->sentry_lock); @@ -3781,7 +3843,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and add and account * them in sit entry set. */ - if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL)) + if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) || + !to_journal) remove_sits_in_journal(sbi); /* @@ -4096,11 +4159,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) start = le32_to_cpu(segno_in_journal(journal, i)); if (start >= MAIN_SEGS(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong journal entry on segno %u", - start); + f2fs_err(sbi, "Wrong journal entry on segno %u", + start); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; break; } @@ -4137,11 +4199,10 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) up_read(&curseg->journal_rwsem); if (!err && total_node_blocks != valid_node_count(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "SIT is corrupted node# %u vs %u", - total_node_blocks, valid_node_count(sbi)); + f2fs_err(sbi, "SIT is corrupted node# %u vs %u", + total_node_blocks, valid_node_count(sbi)); set_sbi_flag(sbi, SBI_NEED_FSCK); - err = -EINVAL; + err = -EFSCORRUPTED; } return err; @@ -4232,6 +4293,39 @@ static int build_dirty_segmap(struct f2fs_sb_info *sbi) return init_victim_secmap(sbi); } +static int sanity_check_curseg(struct f2fs_sb_info *sbi) +{ + int i; + + /* + * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr; + * In LFS curseg, all blkaddr after .next_blkoff should be unused. + */ + for (i = 0; i < NO_CHECK_TYPE; i++) { + struct curseg_info *curseg = CURSEG_I(sbi, i); + struct seg_entry *se = get_seg_entry(sbi, curseg->segno); + unsigned int blkofs = curseg->next_blkoff; + + if (f2fs_test_bit(blkofs, se->cur_valid_map)) + goto out; + + if (curseg->alloc_type == SSR) + continue; + + for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) { + if (!f2fs_test_bit(blkofs, se->cur_valid_map)) + continue; +out: + f2fs_err(sbi, + "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u", + i, curseg->segno, curseg->alloc_type, + curseg->next_blkoff, blkofs); + return -EFSCORRUPTED; + } + } + return 0; +} + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -4327,6 +4421,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) if (err) return err; + err = sanity_check_curseg(sbi); + if (err) + return err; + init_min_max_mtime(sbi); return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 429007b8036e..b74602813a05 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -109,7 +109,7 @@ #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ - ((MAIN_SEGS(sbi) + SIT_ENTRY_PER_BLOCK - 1) / SIT_ENTRY_PER_BLOCK) + DIV_ROUND_UP(MAIN_SEGS(sbi), SIT_ENTRY_PER_BLOCK) #define f2fs_bitmap_size(nr) \ (BITS_TO_LONGS(nr) * sizeof(unsigned long)) @@ -693,21 +693,19 @@ static inline int check_block_count(struct f2fs_sb_info *sbi, } while (cur_pos < sbi->blocks_per_seg); if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Mismatch valid blocks %d vs. %d", - GET_SIT_VBLOCKS(raw_sit), valid_blocks); + f2fs_err(sbi, "Mismatch valid blocks %d vs. %d", + GET_SIT_VBLOCKS(raw_sit), valid_blocks); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } /* check segment usage, and check boundary of a given segment number */ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg || segno > TOTAL_SEGS(sbi) - 1)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid blocks %d or segno %u", - GET_SIT_VBLOCKS(raw_sit), segno); + f2fs_err(sbi, "Wrong valid blocks %d or segno %u", + GET_SIT_VBLOCKS(raw_sit), segno); set_sbi_flag(sbi, SBI_NEED_FSCK); - return -EINVAL; + return -EFSCORRUPTED; } return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6b959bbb336a..d95a681ef7c9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -136,7 +136,10 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, - Opt_checkpoint, + Opt_checkpoint_disable, + Opt_checkpoint_disable_cap, + Opt_checkpoint_disable_cap_perc, + Opt_checkpoint_enable, Opt_err, }; @@ -195,45 +198,52 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, - {Opt_checkpoint, "checkpoint=%s"}, + {Opt_checkpoint_disable, "checkpoint=disable"}, + {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, + {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, + {Opt_checkpoint_enable, "checkpoint=enable"}, {Opt_err, NULL}, }; -void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) +void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) { struct va_format vaf; va_list args; + int level; va_start(args, fmt); - vaf.fmt = fmt; + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); vaf.va = &args; - printk("%sF2FS-fs (%s): %pV\n", level, sb->s_id, &vaf); + printk("%c%cF2FS-fs (%s): %pV\n", + KERN_SOH_ASCII, level, sbi->sb->s_id, &vaf); + va_end(args); } static inline void limit_reserve_root(struct f2fs_sb_info *sbi) { - block_t limit = (sbi->user_block_count << 1) / 1000; + block_t limit = min((sbi->user_block_count << 1) / 1000, + sbi->user_block_count - sbi->reserved_blocks); /* limit is 0.2% */ if (test_opt(sbi, RESERVE_ROOT) && F2FS_OPTION(sbi).root_reserved_blocks > limit) { F2FS_OPTION(sbi).root_reserved_blocks = limit; - f2fs_msg(sbi->sb, KERN_INFO, - "Reduce reserved blocks for root = %u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Reduce reserved blocks for root = %u", + F2FS_OPTION(sbi).root_reserved_blocks); } if (!test_opt(sbi, RESERVE_ROOT) && (!uid_eq(F2FS_OPTION(sbi).s_resuid, make_kuid(&init_user_ns, F2FS_DEF_RESUID)) || !gid_eq(F2FS_OPTION(sbi).s_resgid, make_kgid(&init_user_ns, F2FS_DEF_RESGID)))) - f2fs_msg(sbi->sb, KERN_INFO, - "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", - from_kuid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resuid), - from_kgid_munged(&init_user_ns, - F2FS_OPTION(sbi).s_resgid)); + f2fs_info(sbi, "Ignore s_resuid=%u, s_resgid=%u w/o reserve_root", + from_kuid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resuid), + from_kgid_munged(&init_user_ns, + F2FS_OPTION(sbi).s_resgid)); } static void init_once(void *foo) @@ -254,35 +264,29 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype, int ret = -EINVAL; if (sb_any_quota_loaded(sb) && !F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, - "Cannot change journaled " - "quota options when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } if (f2fs_sb_has_quota_ino(sbi)) { - f2fs_msg(sb, KERN_INFO, - "QUOTA feature is enabled, so ignore qf_name"); + f2fs_info(sbi, "QUOTA feature is enabled, so ignore qf_name"); return 0; } qname = match_strdup(args); if (!qname) { - f2fs_msg(sb, KERN_ERR, - "Not enough memory for storing quotafile name"); + f2fs_err(sbi, "Not enough memory for storing quotafile name"); return -ENOMEM; } if (F2FS_OPTION(sbi).s_qf_names[qtype]) { if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0) ret = 0; else - f2fs_msg(sb, KERN_ERR, - "%s quota file already specified", + f2fs_err(sbi, "%s quota file already specified", QTYPE2NAME(qtype)); goto errout; } if (strchr(qname, '/')) { - f2fs_msg(sb, KERN_ERR, - "quotafile must be on filesystem root"); + f2fs_err(sbi, "quotafile must be on filesystem root"); goto errout; } F2FS_OPTION(sbi).s_qf_names[qtype] = qname; @@ -298,8 +302,7 @@ static int f2fs_clear_qf_name(struct super_block *sb, int qtype) struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { - f2fs_msg(sb, KERN_ERR, "Cannot change journaled quota options" - " when quota turned on"); + f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); return -EINVAL; } kvfree(F2FS_OPTION(sbi).s_qf_names[qtype]); @@ -315,8 +318,7 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) * to support legacy quotas in quota files. */ if (test_opt(sbi, PRJQUOTA) && !f2fs_sb_has_project_quota(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, "Project quota feature not enabled. " - "Cannot enable project quota enforcement."); + f2fs_err(sbi, "Project quota feature not enabled. Cannot enable project quota enforcement."); return -1; } if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] || @@ -336,21 +338,18 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) if (test_opt(sbi, GRPQUOTA) || test_opt(sbi, USRQUOTA) || test_opt(sbi, PRJQUOTA)) { - f2fs_msg(sbi->sb, KERN_ERR, "old and new quota " - "format mixing"); + f2fs_err(sbi, "old and new quota format mixing"); return -1; } if (!F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_ERR, "journaled quota format " - "not specified"); + f2fs_err(sbi, "journaled quota format not specified"); return -1; } } if (f2fs_sb_has_quota_ino(sbi) && F2FS_OPTION(sbi).s_jquota_fmt) { - f2fs_msg(sbi->sb, KERN_INFO, - "QUOTA feature is enabled, so ignore jquota_fmt"); + f2fs_info(sbi, "QUOTA feature is enabled, so ignore jquota_fmt"); F2FS_OPTION(sbi).s_jquota_fmt = 0; } return 0; @@ -418,8 +417,7 @@ static int parse_options(struct super_block *sb, char *options) break; case Opt_nodiscard: if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_WARNING, - "discard is required for zoned block devices"); + f2fs_warn(sbi, "discard is required for zoned block devices"); return -EINVAL; } clear_opt(sbi, DISCARD); @@ -451,20 +449,16 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_user_xattr: - f2fs_msg(sb, KERN_INFO, - "user_xattr options not supported"); + f2fs_info(sbi, "user_xattr options not supported"); break; case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); + f2fs_info(sbi, "nouser_xattr options not supported"); break; case Opt_inline_xattr: - f2fs_msg(sb, KERN_INFO, - "inline_xattr options not supported"); + f2fs_info(sbi, "inline_xattr options not supported"); break; case Opt_noinline_xattr: - f2fs_msg(sb, KERN_INFO, - "noinline_xattr options not supported"); + f2fs_info(sbi, "noinline_xattr options not supported"); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -476,10 +470,10 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_acl: - f2fs_msg(sb, KERN_INFO, "acl options not supported"); + f2fs_info(sbi, "acl options not supported"); break; case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + f2fs_info(sbi, "noacl options not supported"); break; #endif case Opt_active_logs: @@ -529,9 +523,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (test_opt(sbi, RESERVE_ROOT)) { - f2fs_msg(sb, KERN_INFO, - "Preserve previous reserve_root=%u", - F2FS_OPTION(sbi).root_reserved_blocks); + f2fs_info(sbi, "Preserve previous reserve_root=%u", + F2FS_OPTION(sbi).root_reserved_blocks); } else { F2FS_OPTION(sbi).root_reserved_blocks = arg; set_opt(sbi, RESERVE_ROOT); @@ -542,8 +535,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; uid = make_kuid(current_user_ns(), arg); if (!uid_valid(uid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid uid value %d", arg); + f2fs_err(sbi, "Invalid uid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resuid = uid; @@ -553,8 +545,7 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; gid = make_kgid(current_user_ns(), arg); if (!gid_valid(gid)) { - f2fs_msg(sb, KERN_ERR, - "Invalid gid value %d", arg); + f2fs_err(sbi, "Invalid gid value %d", arg); return -EINVAL; } F2FS_OPTION(sbi).s_resgid = gid; @@ -567,9 +558,7 @@ static int parse_options(struct super_block *sb, char *options) if (strlen(name) == 8 && !strncmp(name, "adaptive", 8)) { if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_WARNING, - "adaptive mode is not allowed with " - "zoned block device feature"); + f2fs_warn(sbi, "adaptive mode is not allowed with zoned block device feature"); kvfree(name); return -EINVAL; } @@ -587,9 +576,8 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { - f2fs_msg(sb, KERN_WARNING, - "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + f2fs_warn(sbi, "Not support %d, larger than %d", + 1 << arg, BIO_MAX_PAGES); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; @@ -610,13 +598,11 @@ static int parse_options(struct super_block *sb, char *options) break; #else case Opt_fault_injection: - f2fs_msg(sb, KERN_INFO, - "fault_injection options not supported"); + f2fs_info(sbi, "fault_injection options not supported"); break; case Opt_fault_type: - f2fs_msg(sb, KERN_INFO, - "fault_type options not supported"); + f2fs_info(sbi, "fault_type options not supported"); break; #endif case Opt_lazytime: @@ -696,8 +682,7 @@ static int parse_options(struct super_block *sb, char *options) case Opt_jqfmt_vfsv0: case Opt_jqfmt_vfsv1: case Opt_noquota: - f2fs_msg(sb, KERN_INFO, - "quota operations not supported"); + f2fs_info(sbi, "quota operations not supported"); break; #endif case Opt_whint: @@ -759,39 +744,44 @@ static int parse_options(struct super_block *sb, char *options) case Opt_test_dummy_encryption: #ifdef CONFIG_FS_ENCRYPTION if (!f2fs_sb_has_encrypt(sbi)) { - f2fs_msg(sb, KERN_ERR, "Encrypt feature is off"); + f2fs_err(sbi, "Encrypt feature is off"); return -EINVAL; } F2FS_OPTION(sbi).test_dummy_encryption = true; - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mode enabled"); + f2fs_info(sbi, "Test dummy encryption mode enabled"); #else - f2fs_msg(sb, KERN_INFO, - "Test dummy encryption mount option ignored"); + f2fs_info(sbi, "Test dummy encryption mount option ignored"); #endif break; - case Opt_checkpoint: - name = match_strdup(&args[0]); - if (!name) - return -ENOMEM; - - if (strlen(name) == 6 && - !strncmp(name, "enable", 6)) { - clear_opt(sbi, DISABLE_CHECKPOINT); - } else if (strlen(name) == 7 && - !strncmp(name, "disable", 7)) { - set_opt(sbi, DISABLE_CHECKPOINT); - } else { - kvfree(name); + case Opt_checkpoint_disable_cap_perc: + if (args->from && match_int(args, &arg)) return -EINVAL; - } - kvfree(name); + if (arg < 0 || arg > 100) + return -EINVAL; + if (arg == 100) + F2FS_OPTION(sbi).unusable_cap = + sbi->user_block_count; + else + F2FS_OPTION(sbi).unusable_cap = + (sbi->user_block_count / 100) * arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable_cap: + if (args->from && match_int(args, &arg)) + return -EINVAL; + F2FS_OPTION(sbi).unusable_cap = arg; + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_disable: + set_opt(sbi, DISABLE_CHECKPOINT); + break; + case Opt_checkpoint_enable: + clear_opt(sbi, DISABLE_CHECKPOINT); break; default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); + f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", + p); return -EINVAL; } } @@ -800,23 +790,18 @@ static int parse_options(struct super_block *sb, char *options) return -EINVAL; #else if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_msg(sbi->sb, KERN_INFO, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + f2fs_info(sbi, "Filesystem with quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } if (f2fs_sb_has_project_quota(sbi) && !f2fs_readonly(sbi->sb)) { - f2fs_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be " - "mounted RDWR without CONFIG_QUOTA"); + f2fs_err(sbi, "Filesystem with project quota feature cannot be mounted RDWR without CONFIG_QUOTA"); return -EINVAL; } #endif if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "Should set mode=lfs with %uKB-sized IO", - F2FS_IO_SIZE_KB(sbi)); + f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO", + F2FS_IO_SIZE_KB(sbi)); return -EINVAL; } @@ -825,15 +810,11 @@ static int parse_options(struct super_block *sb, char *options) if (!f2fs_sb_has_extra_attr(sbi) || !f2fs_sb_has_flexible_inline_xattr(sbi)) { - f2fs_msg(sb, KERN_ERR, - "extra_attr or flexible_inline_xattr " - "feature is off"); + f2fs_err(sbi, "extra_attr or flexible_inline_xattr feature is off"); return -EINVAL; } if (!test_opt(sbi, INLINE_XATTR)) { - f2fs_msg(sb, KERN_ERR, - "inline_xattr_size option should be " - "set with inline_xattr option"); + f2fs_err(sbi, "inline_xattr_size option should be set with inline_xattr option"); return -EINVAL; } @@ -842,16 +823,14 @@ static int parse_options(struct super_block *sb, char *options) if (F2FS_OPTION(sbi).inline_xattr_size < min_size || F2FS_OPTION(sbi).inline_xattr_size > max_size) { - f2fs_msg(sb, KERN_ERR, - "inline xattr size is out of range: %d ~ %d", - min_size, max_size); + f2fs_err(sbi, "inline xattr size is out of range: %d ~ %d", + min_size, max_size); return -EINVAL; } } if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) { - f2fs_msg(sb, KERN_ERR, - "LFS not compatible with checkpoint=disable\n"); + f2fs_err(sbi, "LFS not compatible with checkpoint=disable\n"); return -EINVAL; } @@ -1313,6 +1292,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); + else + seq_puts(seq, ",nodiscard"); if (test_opt(sbi, NOHEAP)) seq_puts(seq, ",no_heap"); else @@ -1409,8 +1390,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",alloc_mode=%s", "reuse"); if (test_opt(sbi, DISABLE_CHECKPOINT)) - seq_puts(seq, ",checkpoint=disable"); - + seq_printf(seq, ",checkpoint=disable:%u", + F2FS_OPTION(sbi).unusable_cap); if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX) seq_printf(seq, ",fsync_mode=%s", "posix"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) @@ -1439,6 +1420,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, EXTENT_CACHE); set_opt(sbi, NOHEAP); clear_opt(sbi, DISABLE_CHECKPOINT); + F2FS_OPTION(sbi).unusable_cap = 0; sbi->sb->s_flags |= SB_LAZYTIME; set_opt(sbi, FLUSH_MERGE); set_opt(sbi, DISCARD); @@ -1467,10 +1449,10 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) struct cp_control cpc; int err = 0; int ret; + block_t unusable; if (s_flags & SB_RDONLY) { - f2fs_msg(sbi->sb, KERN_ERR, - "checkpoint=disable on readonly fs"); + f2fs_err(sbi, "checkpoint=disable on readonly fs"); return -EINVAL; } sbi->sb->s_flags |= SB_ACTIVE; @@ -1494,7 +1476,8 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - if (f2fs_disable_cp_again(sbi)) { + unusable = f2fs_get_unusable_blocks(sbi); + if (f2fs_disable_cp_again(sbi, unusable)) { err = -EAGAIN; goto restore_flag; } @@ -1507,7 +1490,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto out_unlock; spin_lock(&sbi->stat_lock); - sbi->unusable_block_count = 0; + sbi->unusable_block_count = unusable; spin_unlock(&sbi->stat_lock); out_unlock: @@ -1572,8 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* recover superblocks we couldn't write due to previous RO mount */ if (!(*flags & SB_RDONLY) && is_sbi_flag_set(sbi, SBI_NEED_SB_WRITE)) { err = f2fs_commit_super(sbi, false); - f2fs_msg(sb, KERN_INFO, - "Try to recover all the superblocks, ret: %d", err); + f2fs_info(sbi, "Try to recover all the superblocks, ret: %d", + err); if (!err) clear_sbi_flag(sbi, SBI_NEED_SB_WRITE); } @@ -1614,15 +1597,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* disallow enable/disable extent_cache dynamically */ if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "switch extent_cache option is not allowed"); + f2fs_warn(sbi, "switch extent_cache option is not allowed"); goto restore_opts; } if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; - f2fs_msg(sbi->sb, KERN_WARNING, - "disabling checkpoint not compatible with read-only"); + f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); goto restore_opts; } @@ -1692,8 +1673,7 @@ skip: restore_gc: if (need_restart_gc) { if (f2fs_start_gc_thread(sbi)) - f2fs_msg(sbi->sb, KERN_WARNING, - "background gc thread has stopped"); + f2fs_warn(sbi, "background gc thread has stopped"); } else if (need_stop_gc) { f2fs_stop_gc_thread(sbi); } @@ -1832,8 +1812,7 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode) static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type) { if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) { - f2fs_msg(sbi->sb, KERN_ERR, - "quota sysfile may be corrupted, skip loading it"); + f2fs_err(sbi, "quota sysfile may be corrupted, skip loading it"); return 0; } @@ -1849,8 +1828,7 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) if (f2fs_sb_has_quota_ino(sbi) && rdonly) { err = f2fs_enable_quotas(sbi->sb); if (err) { - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quota_ino: %d", err); + f2fs_err(sbi, "Cannot turn on quota_ino: %d", err); return 0; } return 1; @@ -1863,8 +1841,8 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly) enabled = 1; continue; } - f2fs_msg(sbi->sb, KERN_ERR, - "Cannot turn on quotas: %d on %d", err, i); + f2fs_err(sbi, "Cannot turn on quotas: %d on %d", + err, i); } } return enabled; @@ -1885,8 +1863,7 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, qf_inode = f2fs_iget(sb, qf_inum); if (IS_ERR(qf_inode)) { - f2fs_msg(sb, KERN_ERR, - "Bad quota inode %u:%lu", type, qf_inum); + f2fs_err(F2FS_SB(sb), "Bad quota inode %u:%lu", type, qf_inum); return PTR_ERR(qf_inode); } @@ -1899,17 +1876,17 @@ static int f2fs_quota_enable(struct super_block *sb, int type, int format_id, static int f2fs_enable_quotas(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int type, err = 0; unsigned long qf_inum; bool quota_mopt[MAXQUOTAS] = { - test_opt(F2FS_SB(sb), USRQUOTA), - test_opt(F2FS_SB(sb), GRPQUOTA), - test_opt(F2FS_SB(sb), PRJQUOTA), + test_opt(sbi, USRQUOTA), + test_opt(sbi, GRPQUOTA), + test_opt(sbi, PRJQUOTA), }; if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) { - f2fs_msg(sb, KERN_ERR, - "quota file may be corrupted, skip loading it"); + f2fs_err(sbi, "quota file may be corrupted, skip loading it"); return 0; } @@ -1922,10 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb) DQUOT_USAGE_ENABLED | (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0)); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "fsck to fix.", type, err); + f2fs_err(sbi, "Failed to enable quota tracking (type=%d, err=%d). Please run fsck to fix.", + type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); set_sbi_flag(F2FS_SB(sb), @@ -1944,6 +1919,18 @@ int f2fs_quota_sync(struct super_block *sb, int type) int cnt; int ret; + /* + * do_quotactl + * f2fs_quota_sync + * down_read(quota_sem) + * dquot_writeback_dquots() + * f2fs_dquot_commit + * block_operation + * down_read(quota_sem) + */ + f2fs_lock_op(sbi); + + down_read(&sbi->quota_sem); ret = dquot_writeback_dquots(sb, type); if (ret) goto out; @@ -1981,6 +1968,8 @@ int f2fs_quota_sync(struct super_block *sb, int type) out: if (ret) set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); + f2fs_unlock_op(sbi); return ret; } @@ -2045,10 +2034,8 @@ void f2fs_quota_off_umount(struct super_block *sb) if (err) { int ret = dquot_quota_off(sb, type); - f2fs_msg(sb, KERN_ERR, - "Fail to turn off disk quota " - "(type: %d, err: %d, ret:%d), Please " - "run fsck to fix it.", type, err, ret); + f2fs_err(F2FS_SB(sb), "Fail to turn off disk quota (type: %d, err: %d, ret:%d), Please run fsck to fix it.", + type, err, ret); set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); } } @@ -2074,32 +2061,40 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb) static int f2fs_dquot_commit(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_commit(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_acquire(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); - + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_release(struct dquot *dquot) { + struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_release(dquot); if (ret < 0) - set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } @@ -2109,22 +2104,27 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_mark_dquot_dirty(dquot); /* if we are using journalled quota */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); + up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_commit_info(struct super_block *sb, int type) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); int ret; + down_read(&sbi->quota_sem); ret = dquot_commit_info(sb, type); if (ret < 0) - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); + set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + up_read(&sbi->quota_sem); return ret; } @@ -2341,55 +2341,49 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, (segment_count << log_blocks_per_seg); if (segment0_blkaddr != cp_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Mismatch start address, segment0(%u) cp_blkaddr(%u)", - segment0_blkaddr, cp_blkaddr); + f2fs_info(sbi, "Mismatch start address, segment0(%u) cp_blkaddr(%u)", + segment0_blkaddr, cp_blkaddr); return true; } if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) != sit_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong CP boundary, start(%u) end(%u) blocks(%u)", - cp_blkaddr, sit_blkaddr, - segment_count_ckpt << log_blocks_per_seg); + f2fs_info(sbi, "Wrong CP boundary, start(%u) end(%u) blocks(%u)", + cp_blkaddr, sit_blkaddr, + segment_count_ckpt << log_blocks_per_seg); return true; } if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) != nat_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", - sit_blkaddr, nat_blkaddr, - segment_count_sit << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SIT boundary, start(%u) end(%u) blocks(%u)", + sit_blkaddr, nat_blkaddr, + segment_count_sit << log_blocks_per_seg); return true; } if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) != ssa_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", - nat_blkaddr, ssa_blkaddr, - segment_count_nat << log_blocks_per_seg); + f2fs_info(sbi, "Wrong NAT boundary, start(%u) end(%u) blocks(%u)", + nat_blkaddr, ssa_blkaddr, + segment_count_nat << log_blocks_per_seg); return true; } if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) != main_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", - ssa_blkaddr, main_blkaddr, - segment_count_ssa << log_blocks_per_seg); + f2fs_info(sbi, "Wrong SSA boundary, start(%u) end(%u) blocks(%u)", + ssa_blkaddr, main_blkaddr, + segment_count_ssa << log_blocks_per_seg); return true; } if (main_end_blkaddr > seg_end_blkaddr) { - f2fs_msg(sb, KERN_INFO, - "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", - main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)", + main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); return true; } else if (main_end_blkaddr < seg_end_blkaddr) { int err = 0; @@ -2406,12 +2400,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, err = __f2fs_commit_super(bh, NULL); res = err ? "failed" : "done"; } - f2fs_msg(sb, KERN_INFO, - "Fix alignment : %s, start(%u) end(%u) block(%u)", - res, main_blkaddr, - segment0_blkaddr + - (segment_count << log_blocks_per_seg), - segment_count_main << log_blocks_per_seg); + f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)", + res, main_blkaddr, + segment0_blkaddr + + (segment_count << log_blocks_per_seg), + segment_count_main << log_blocks_per_seg); if (err) return true; } @@ -2425,7 +2418,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, block_t total_sections, blocks_per_seg; struct f2fs_super_block *raw_super = (struct f2fs_super_block *) (bh->b_data + F2FS_SUPER_OFFSET); - struct super_block *sb = sbi->sb; unsigned int blocksize; size_t crc_offset = 0; __u32 crc = 0; @@ -2435,48 +2427,42 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, crc_offset = le32_to_cpu(raw_super->checksum_offset); if (crc_offset != offsetof(struct f2fs_super_block, crc)) { - f2fs_msg(sb, KERN_INFO, - "Invalid SB checksum offset: %zu", - crc_offset); + f2fs_info(sbi, "Invalid SB checksum offset: %zu", + crc_offset); return 1; } crc = le32_to_cpu(raw_super->crc); if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { - f2fs_msg(sb, KERN_INFO, - "Invalid SB checksum value: %u", crc); + f2fs_info(sbi, "Invalid SB checksum value: %u", crc); return 1; } } if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) { - f2fs_msg(sb, KERN_INFO, - "Magic Mismatch, valid(0x%x) - read(0x%x)", - F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); + f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)", + F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic)); return 1; } /* Currently, support only 4KB page cache size */ if (F2FS_BLKSIZE != PAGE_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid page_cache_size (%lu), supports only 4KB", - PAGE_SIZE); + f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", + PAGE_SIZE); return 1; } /* Currently, support only 4KB block size */ blocksize = 1 << le32_to_cpu(raw_super->log_blocksize); if (blocksize != F2FS_BLKSIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid blocksize (%u), supports only 4KB", - blocksize); + f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", + blocksize); return 1; } /* check log blocks per segment */ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { - f2fs_msg(sb, KERN_INFO, - "Invalid log blocks per segment (%u)", - le32_to_cpu(raw_super->log_blocks_per_seg)); + f2fs_info(sbi, "Invalid log blocks per segment (%u)", + le32_to_cpu(raw_super->log_blocks_per_seg)); return 1; } @@ -2485,17 +2471,16 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, F2FS_MAX_LOG_SECTOR_SIZE || le32_to_cpu(raw_super->log_sectorsize) < F2FS_MIN_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)", - le32_to_cpu(raw_super->log_sectorsize)); + f2fs_info(sbi, "Invalid log sectorsize (%u)", + le32_to_cpu(raw_super->log_sectorsize)); return 1; } if (le32_to_cpu(raw_super->log_sectors_per_block) + le32_to_cpu(raw_super->log_sectorsize) != F2FS_MAX_LOG_SECTOR_SIZE) { - f2fs_msg(sb, KERN_INFO, - "Invalid log sectors per block(%u) log sectorsize(%u)", - le32_to_cpu(raw_super->log_sectors_per_block), - le32_to_cpu(raw_super->log_sectorsize)); + f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", + le32_to_cpu(raw_super->log_sectors_per_block), + le32_to_cpu(raw_super->log_sectorsize)); return 1; } @@ -2509,59 +2494,51 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (segment_count > F2FS_MAX_SEGMENT || segment_count < F2FS_MIN_SEGMENTS) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment count (%u)", - segment_count); + f2fs_info(sbi, "Invalid segment count (%u)", segment_count); return 1; } if (total_sections > segment_count || total_sections < F2FS_MIN_SEGMENTS || segs_per_sec > segment_count || !segs_per_sec) { - f2fs_msg(sb, KERN_INFO, - "Invalid segment/section count (%u, %u x %u)", - segment_count, total_sections, segs_per_sec); + f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", + segment_count, total_sections, segs_per_sec); return 1; } if ((segment_count / segs_per_sec) < total_sections) { - f2fs_msg(sb, KERN_INFO, - "Small segment_count (%u < %u * %u)", - segment_count, segs_per_sec, total_sections); + f2fs_info(sbi, "Small segment_count (%u < %u * %u)", + segment_count, segs_per_sec, total_sections); return 1; } if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { - f2fs_msg(sb, KERN_INFO, - "Wrong segment_count / block_count (%u > %llu)", - segment_count, le64_to_cpu(raw_super->block_count)); + f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", + segment_count, le64_to_cpu(raw_super->block_count)); return 1; } if (secs_per_zone > total_sections || !secs_per_zone) { - f2fs_msg(sb, KERN_INFO, - "Wrong secs_per_zone / total_sections (%u, %u)", - secs_per_zone, total_sections); + f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", + secs_per_zone, total_sections); return 1; } if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || raw_super->hot_ext_count > F2FS_MAX_EXTENSION || (le32_to_cpu(raw_super->extension_count) + raw_super->hot_ext_count) > F2FS_MAX_EXTENSION) { - f2fs_msg(sb, KERN_INFO, - "Corrupted extension count (%u + %u > %u)", - le32_to_cpu(raw_super->extension_count), - raw_super->hot_ext_count, - F2FS_MAX_EXTENSION); + f2fs_info(sbi, "Corrupted extension count (%u + %u > %u)", + le32_to_cpu(raw_super->extension_count), + raw_super->hot_ext_count, + F2FS_MAX_EXTENSION); return 1; } if (le32_to_cpu(raw_super->cp_payload) > (blocks_per_seg - F2FS_CP_PACKS)) { - f2fs_msg(sb, KERN_INFO, - "Insane cp_payload (%u > %u)", - le32_to_cpu(raw_super->cp_payload), - blocks_per_seg - F2FS_CP_PACKS); + f2fs_info(sbi, "Insane cp_payload (%u > %u)", + le32_to_cpu(raw_super->cp_payload), + blocks_per_seg - F2FS_CP_PACKS); return 1; } @@ -2569,11 +2546,10 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, if (le32_to_cpu(raw_super->node_ino) != 1 || le32_to_cpu(raw_super->meta_ino) != 2 || le32_to_cpu(raw_super->root_ino) != 3) { - f2fs_msg(sb, KERN_INFO, - "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", - le32_to_cpu(raw_super->node_ino), - le32_to_cpu(raw_super->meta_ino), - le32_to_cpu(raw_super->root_ino)); + f2fs_info(sbi, "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)", + le32_to_cpu(raw_super->node_ino), + le32_to_cpu(raw_super->meta_ino), + le32_to_cpu(raw_super->root_ino)); return 1; } @@ -2617,8 +2593,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (unlikely(fsmeta < F2FS_MIN_SEGMENTS || ovp_segments == 0 || reserved_segments == 0)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong layout: check mkfs.f2fs version"); + f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version"); return 1; } @@ -2627,16 +2602,15 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg); if (!user_block_count || user_block_count >= segment_count_main << log_blocks_per_seg) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong user_block_count: %u", user_block_count); + f2fs_err(sbi, "Wrong user_block_count: %u", + user_block_count); return 1; } valid_user_blocks = le64_to_cpu(ckpt->valid_block_count); if (valid_user_blocks > user_block_count) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid_user_blocks: %u, user_block_count: %u", - valid_user_blocks, user_block_count); + f2fs_err(sbi, "Wrong valid_user_blocks: %u, user_block_count: %u", + valid_user_blocks, user_block_count); return 1; } @@ -2644,9 +2618,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) avail_node_count = sbi->total_node_count - sbi->nquota_files - F2FS_RESERVED_NODE_NUM; if (valid_node_count > avail_node_count) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong valid_node_count: %u, avail_node_count: %u", - valid_node_count, avail_node_count); + f2fs_err(sbi, "Wrong valid_node_count: %u, avail_node_count: %u", + valid_node_count, avail_node_count); return 1; } @@ -2660,10 +2633,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_node_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Node segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Node segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2675,10 +2647,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u, %u) has the same " - "segno: %u", i, j, - le32_to_cpu(ckpt->cur_data_segno[i])); + f2fs_err(sbi, "Data segment (%u, %u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); return 1; } } @@ -2687,10 +2658,9 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { if (le32_to_cpu(ckpt->cur_node_segno[i]) == le32_to_cpu(ckpt->cur_data_segno[j])) { - f2fs_msg(sbi->sb, KERN_ERR, - "Data segment (%u) and Data segment (%u)" - " has the same segno: %u", i, j, - le32_to_cpu(ckpt->cur_node_segno[i])); + f2fs_err(sbi, "Data segment (%u) and Data segment (%u) has the same segno: %u", + i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); return 1; } } @@ -2701,9 +2671,8 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (sit_bitmap_size != ((sit_segs / 2) << log_blocks_per_seg) / 8 || nat_bitmap_size != ((nat_segs / 2) << log_blocks_per_seg) / 8) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong bitmap size: sit: %u, nat:%u", - sit_bitmap_size, nat_bitmap_size); + f2fs_err(sbi, "Wrong bitmap size: sit: %u, nat:%u", + sit_bitmap_size, nat_bitmap_size); return 1; } @@ -2712,14 +2681,22 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) if (cp_pack_start_sum < cp_payload + 1 || cp_pack_start_sum > blocks_per_seg - 1 - NR_CURSEG_TYPE) { - f2fs_msg(sbi->sb, KERN_ERR, - "Wrong cp_pack_start_sum: %u", - cp_pack_start_sum); + f2fs_err(sbi, "Wrong cp_pack_start_sum: %u", + cp_pack_start_sum); + return 1; + } + + if (__is_set_ckpt_flags(ckpt, CP_LARGE_NAT_BITMAP_FLAG) && + le32_to_cpu(ckpt->checksum_offset) != CP_MIN_CHKSUM_OFFSET) { + f2fs_warn(sbi, "using deprecated layout of large_nat_bitmap, " + "please run fsck v1.13.0 or higher to repair, chksum_offset: %u, " + "fixed with patch: \"f2fs-tools: relocate chksum_offset for large_nat_bitmap feature\"", + le32_to_cpu(ckpt->checksum_offset)); return 1; } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck"); + f2fs_err(sbi, "A bug case: need to run fsck"); return 1; } return 0; @@ -2888,18 +2865,17 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, for (block = 0; block < 2; block++) { bh = sb_bread(sb, block); if (!bh) { - f2fs_msg(sb, KERN_ERR, "Unable to read %dth superblock", - block + 1); + f2fs_err(sbi, "Unable to read %dth superblock", + block + 1); err = -EIO; continue; } /* sanity checking of raw super */ if (sanity_check_raw_super(sbi, bh)) { - f2fs_msg(sb, KERN_ERR, - "Can't find valid F2FS filesystem in %dth superblock", - block + 1); - err = -EINVAL; + f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", + block + 1); + err = -EFSCORRUPTED; brelse(bh); continue; } @@ -3028,36 +3004,32 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && !f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Zoned block device feature not enabled\n"); + f2fs_err(sbi, "Zoned block device feature not enabled\n"); return -EINVAL; } if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) { if (init_blkz_info(sbi, i)) { - f2fs_msg(sbi->sb, KERN_ERR, - "Failed to initialize F2FS blkzone information"); + f2fs_err(sbi, "Failed to initialize F2FS blkzone information"); return -EINVAL; } if (max_devices == 1) break; - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk, - bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? - "Host-aware" : "Host-managed"); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk, + bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ? + "Host-aware" : "Host-managed"); continue; } #endif - f2fs_msg(sbi->sb, KERN_INFO, - "Mount Device [%2d]: %20s, %8u, %8x - %8x", - i, FDEV(i).path, - FDEV(i).total_segments, - FDEV(i).start_blk, FDEV(i).end_blk); - } - f2fs_msg(sbi->sb, KERN_INFO, - "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); + f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x", + i, FDEV(i).path, + FDEV(i).total_segments, + FDEV(i).start_blk, FDEV(i).end_blk); + } + f2fs_info(sbi, + "IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi)); return 0; } @@ -3103,7 +3075,7 @@ try_onemore: /* Load the checksum driver */ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver."); + f2fs_err(sbi, "Cannot load crc32 driver."); err = PTR_ERR(sbi->s_chksum_driver); sbi->s_chksum_driver = NULL; goto free_sbi; @@ -3111,7 +3083,7 @@ try_onemore: /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { - f2fs_msg(sb, KERN_ERR, "unable to set blocksize"); + f2fs_err(sbi, "unable to set blocksize"); goto free_sbi; } @@ -3135,8 +3107,7 @@ try_onemore: */ #ifndef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi)) { - f2fs_msg(sb, KERN_ERR, - "Zoned block device support is not enabled"); + f2fs_err(sbi, "Zoned block device support is not enabled"); err = -EOPNOTSUPP; goto free_sb_buf; } @@ -3160,10 +3131,7 @@ try_onemore: #ifdef CONFIG_QUOTA sb->dq_op = &f2fs_quota_operations; - if (f2fs_sb_has_quota_ino(sbi)) - sb->s_qcop = &dquot_quotactl_sysfile_ops; - else - sb->s_qcop = &f2fs_quotactl_ops; + sb->s_qcop = &f2fs_quotactl_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; if (f2fs_sb_has_quota_ino(sbi)) { @@ -3192,6 +3160,7 @@ try_onemore: mutex_init(&sbi->gc_mutex); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); + mutex_init(&sbi->resize_mutex); init_rwsem(&sbi->node_write); init_rwsem(&sbi->node_change); @@ -3227,6 +3196,7 @@ try_onemore: } init_rwsem(&sbi->cp_rwsem); + init_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); @@ -3246,14 +3216,14 @@ try_onemore: /* get an inode for meta space */ sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi)); if (IS_ERR(sbi->meta_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode"); + f2fs_err(sbi, "Failed to read F2FS meta data inode"); err = PTR_ERR(sbi->meta_inode); goto free_io_dummy; } err = f2fs_get_valid_checkpoint(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to get valid F2FS checkpoint"); + f2fs_err(sbi, "Failed to get valid F2FS checkpoint"); goto free_meta_inode; } @@ -3264,10 +3234,13 @@ try_onemore: sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL; } + if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FSCK_FLAG)) + set_sbi_flag(sbi, SBI_NEED_FSCK); + /* Initialize device list */ err = f2fs_scan_devices(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, "Failed to find devices"); + f2fs_err(sbi, "Failed to find devices"); goto free_devices; } @@ -3287,6 +3260,7 @@ try_onemore: INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } + mutex_init(&sbi->flush_lock); f2fs_init_extent_cache_info(sbi); @@ -3297,14 +3271,14 @@ try_onemore: /* setup f2fs internal modules */ err = f2fs_build_segment_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS segment manager"); + f2fs_err(sbi, "Failed to initialize F2FS segment manager (%d)", + err); goto free_sm; } err = f2fs_build_node_manager(sbi); if (err) { - f2fs_msg(sb, KERN_ERR, - "Failed to initialize F2FS node manager"); + f2fs_err(sbi, "Failed to initialize F2FS node manager (%d)", + err); goto free_nm; } @@ -3329,7 +3303,7 @@ try_onemore: /* get an inode for node space */ sbi->node_inode = f2fs_iget(sb, F2FS_NODE_INO(sbi)); if (IS_ERR(sbi->node_inode)) { - f2fs_msg(sb, KERN_ERR, "Failed to read node inode"); + f2fs_err(sbi, "Failed to read node inode"); err = PTR_ERR(sbi->node_inode); goto free_stats; } @@ -3337,7 +3311,7 @@ try_onemore: /* read root inode and dentry */ root = f2fs_iget(sb, F2FS_ROOT_INO(sbi)); if (IS_ERR(root)) { - f2fs_msg(sb, KERN_ERR, "Failed to read root inode"); + f2fs_err(sbi, "Failed to read root inode"); err = PTR_ERR(root); goto free_node_inode; } @@ -3363,8 +3337,7 @@ try_onemore: if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { err = f2fs_enable_quotas(sb); if (err) - f2fs_msg(sb, KERN_ERR, - "Cannot turn on quotas: error %d", err); + f2fs_err(sbi, "Cannot turn on quotas: error %d", err); } #endif /* if there are nt orphan nodes free them */ @@ -3384,13 +3357,10 @@ try_onemore: if (f2fs_hw_is_readonly(sbi)) { if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data, but " - "write access unavailable"); + f2fs_err(sbi, "Need to recover fsync data, but write access unavailable"); goto free_meta; } - f2fs_msg(sbi->sb, KERN_INFO, "write access " - "unavailable, skipping recovery"); + f2fs_info(sbi, "write access unavailable, skipping recovery"); goto reset_checkpoint; } @@ -3405,8 +3375,8 @@ try_onemore: if (err != -ENOMEM) skip_recovery = true; need_fsck = true; - f2fs_msg(sb, KERN_ERR, - "Cannot recover all fsync data errno=%d", err); + f2fs_err(sbi, "Cannot recover all fsync data errno=%d", + err); goto free_meta; } } else { @@ -3414,8 +3384,7 @@ try_onemore: if (!f2fs_readonly(sb) && err > 0) { err = -EINVAL; - f2fs_msg(sb, KERN_ERR, - "Need to recover fsync data"); + f2fs_err(sbi, "Need to recover fsync data"); goto free_meta; } } @@ -3446,17 +3415,16 @@ reset_checkpoint: /* recover broken superblock */ if (recovery) { err = f2fs_commit_super(sbi, true); - f2fs_msg(sb, KERN_INFO, - "Try to recover %dth superblock, ret: %d", - sbi->valid_super_block ? 1 : 2, err); + f2fs_info(sbi, "Try to recover %dth superblock, ret: %d", + sbi->valid_super_block ? 1 : 2, err); } f2fs_join_shrinker(sbi); f2fs_tuning_parameters(sbi); - f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx", - cur_cp_version(F2FS_CKPT(sbi))); + f2fs_notice(sbi, "Mounted with checkpoint version = %llx", + cur_cp_version(F2FS_CKPT(sbi))); f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index 5c85166677d4..3aeacd0aacfd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -68,6 +68,20 @@ static ssize_t dirty_segments_show(struct f2fs_attr *a, (unsigned long long)(dirty_segments(sbi))); } +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return snprintf(buf, PAGE_SIZE, "%llu\n", + (unsigned long long)unusable); +} + + static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -440,6 +454,7 @@ F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); +F2FS_GENERAL_RO_ATTR(unusable); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -495,6 +510,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), @@ -568,8 +584,7 @@ static int __maybe_unused segment_info_seq_show(struct seq_file *seq, if ((i % 10) == 0) seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u", se->type, se->valid_blocks); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else @@ -595,8 +610,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, struct seg_entry *se = get_seg_entry(sbi, i); seq_printf(seq, "%-10d", i); - seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, false)); + seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e791741d193b..b32c45621679 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -346,7 +346,10 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); if (!*xe) { - err = -EFAULT; + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + err = -EFSCORRUPTED; goto out; } check: @@ -622,7 +625,10 @@ static int __f2fs_setxattr(struct inode *inode, int index, /* find entry with wanted name. */ here = __find_xattr(base_addr, last_base_addr, index, len, name); if (!here) { - error = -EFAULT; + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; goto exit; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 8b0c2bfa90c1..52fa1ef8400b 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -136,27 +136,36 @@ static struct { {FS_JOURNAL_DATA_FL, GFS2_DIF_JDATA | GFS2_DIF_INHERIT_JDATA}, }; +static inline u32 gfs2_gfsflags_to_fsflags(struct inode *inode, u32 gfsflags) +{ + int i; + u32 fsflags = 0; + + if (S_ISDIR(inode->i_mode)) + gfsflags &= ~GFS2_DIF_JDATA; + else + gfsflags &= ~GFS2_DIF_INHERIT_JDATA; + + for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) + if (gfsflags & fsflag_gfs2flag[i].gfsflag) + fsflags |= fsflag_gfs2flag[i].fsflag; + return fsflags; +} + static int gfs2_get_flags(struct file *filp, u32 __user *ptr) { struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; - int i, error; - u32 gfsflags, fsflags = 0; + int error; + u32 fsflags; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); error = gfs2_glock_nq(&gh); if (error) goto out_uninit; - gfsflags = ip->i_diskflags; - if (S_ISDIR(inode->i_mode)) - gfsflags &= ~GFS2_DIF_JDATA; - else - gfsflags &= ~GFS2_DIF_INHERIT_JDATA; - for (i = 0; i < ARRAY_SIZE(fsflag_gfs2flag); i++) - if (gfsflags & fsflag_gfs2flag[i].gfsflag) - fsflags |= fsflag_gfs2flag[i].fsflag; + fsflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags); if (put_user(fsflags, ptr)) error = -EFAULT; @@ -200,9 +209,11 @@ void gfs2_set_inode_flags(struct inode *inode) * @filp: file pointer * @reqflags: The flags to set * @mask: Indicates which flags are valid + * @fsflags: The FS_* inode flags passed in * */ -static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) +static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask, + const u32 fsflags) { struct inode *inode = file_inode(filp); struct gfs2_inode *ip = GFS2_I(inode); @@ -210,7 +221,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) struct buffer_head *bh; struct gfs2_holder gh; int error; - u32 new_flags, flags; + u32 new_flags, flags, oldflags; error = mnt_want_write_file(filp); if (error) @@ -220,6 +231,11 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask) if (error) goto out_drop_write; + oldflags = gfs2_gfsflags_to_fsflags(inode, ip->i_diskflags); + error = vfs_ioc_setflags_prepare(inode, oldflags, fsflags); + if (error) + goto out; + error = -EACCES; if (!inode_owner_or_capable(inode)) goto out; @@ -308,7 +324,7 @@ static int gfs2_set_flags(struct file *filp, u32 __user *ptr) mask &= ~(GFS2_DIF_TOPDIR | GFS2_DIF_INHERIT_JDATA); } - return do_gfs2_set_flags(filp, gfsflags, mask); + return do_gfs2_set_flags(filp, gfsflags, mask, fsflags); } static int gfs2_getlabel(struct file *filp, char __user *label) diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index 5e6502ef7415..ce15b9496b77 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c @@ -57,9 +57,8 @@ static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags) return 0; } -static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) +static inline unsigned int hfsplus_getflags(struct inode *inode) { - struct inode *inode = file_inode(file); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags = 0; @@ -69,6 +68,13 @@ static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) flags |= FS_APPEND_FL; if (hip->userflags & HFSPLUS_FLG_NODUMP) flags |= FS_NODUMP_FL; + return flags; +} + +static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) +{ + struct inode *inode = file_inode(file); + unsigned int flags = hfsplus_getflags(inode); return put_user(flags, user_flags); } @@ -78,6 +84,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) struct inode *inode = file_inode(file); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags, new_fl = 0; + unsigned int oldflags = hfsplus_getflags(inode); int err = 0; err = mnt_want_write_file(file); @@ -96,13 +103,9 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) inode_lock(inode); - if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || - inode->i_flags & (S_IMMUTABLE|S_APPEND)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto out_unlock_inode; - } - } + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) + goto out_unlock_inode; /* don't silently ignore unsupported ext2 flags */ if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { diff --git a/fs/inode.c b/fs/inode.c index 5f5431ec3d62..0f1e3b563c47 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2190,3 +2190,89 @@ struct timespec64 current_time(struct inode *inode) return timespec64_trunc(now, inode->i_sb->s_time_gran); } EXPORT_SYMBOL(current_time); + +/* + * Generic function to check FS_IOC_SETFLAGS values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, + unsigned int flags) +{ + /* + * The IMMUTABLE and APPEND_ONLY flags can only be changed by + * the relevant capability. + * + * This test looks nicer. Thanks to Pauline Middelink + */ + if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_setflags_prepare); + +/* + * Generic function to check FS_IOC_FSSETXATTR values and reject any invalid + * configurations. + * + * Note: the caller should be holding i_mutex, or else be sure that they have + * exclusive access to the inode structure. + */ +int vfs_ioc_fssetxattr_check(struct inode *inode, const struct fsxattr *old_fa, + struct fsxattr *fa) +{ + /* + * Can't modify an immutable/append-only file unless we have + * appropriate permission. + */ + if ((old_fa->fsx_xflags ^ fa->fsx_xflags) & + (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND) && + !capable(CAP_LINUX_IMMUTABLE)) + return -EPERM; + + /* + * Project Quota ID state is only allowed to change from within the init + * namespace. Enforce that restriction only if we are trying to change + * the quota ID state. Everything else is allowed in user namespaces. + */ + if (current_user_ns() != &init_user_ns) { + if (old_fa->fsx_projid != fa->fsx_projid) + return -EINVAL; + if ((old_fa->fsx_xflags ^ fa->fsx_xflags) & + FS_XFLAG_PROJINHERIT) + return -EINVAL; + } + + /* Check extent size hints. */ + if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(inode->i_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && + !S_ISDIR(inode->i_mode)) + return -EINVAL; + + if ((fa->fsx_xflags & FS_XFLAG_COWEXTSIZE) && + !S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) + return -EINVAL; + + /* + * It is only valid to set the DAX flag on regular files and + * directories on filesystems. + */ + if ((fa->fsx_xflags & FS_XFLAG_DAX) && + !(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) + return -EINVAL; + + /* Extent size hints of zero turn off the flags. */ + if (fa->fsx_extsize == 0) + fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); + if (fa->fsx_cowextsize == 0) + fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + + return 0; +} +EXPORT_SYMBOL(vfs_ioc_fssetxattr_check); diff --git a/fs/io_uring.c b/fs/io_uring.c index 4ed4b110a154..3fd884b4e0be 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -231,6 +231,7 @@ struct io_ring_ctx { struct task_struct *sqo_thread; /* if using sq thread polling */ struct mm_struct *sqo_mm; wait_queue_head_t sqo_wait; + struct completion sqo_thread_started; struct { /* CQ ring */ @@ -322,6 +323,7 @@ struct io_kiocb { struct io_ring_ctx *ctx; struct list_head list; + struct list_head link_list; unsigned int flags; refcount_t refs; #define REQ_F_NOWAIT 1 /* must not punt to workers */ @@ -330,8 +332,10 @@ struct io_kiocb { #define REQ_F_SEQ_PREV 8 /* sequential with previous */ #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ #define REQ_F_IO_DRAINED 32 /* drain done */ +#define REQ_F_LINK 64 /* linked sqes */ +#define REQ_F_FAIL_LINK 128 /* fail rest of links */ u64 user_data; - u32 error; /* iopoll result from callback */ + u32 result; u32 sequence; struct work_struct work; @@ -403,6 +407,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) ctx->flags = p->flags; init_waitqueue_head(&ctx->cq_wait); init_completion(&ctx->ctx_done); + init_completion(&ctx->sqo_thread_started); mutex_init(&ctx->uring_lock); init_waitqueue_head(&ctx->wait); for (i = 0; i < ARRAY_SIZE(ctx->pending_async); i++) { @@ -584,6 +589,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->flags = 0; /* one is dropped after submission, the other at completion */ refcount_set(&req->refs, 2); + req->result = 0; return req; out: io_ring_drop_ctx_refs(ctx, 1); @@ -599,7 +605,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) } } -static void io_free_req(struct io_kiocb *req) +static void __io_free_req(struct io_kiocb *req) { if (req->file && !(req->flags & REQ_F_FIXED_FILE)) fput(req->file); @@ -607,6 +613,63 @@ static void io_free_req(struct io_kiocb *req) kmem_cache_free(req_cachep, req); } +static void io_req_link_next(struct io_kiocb *req) +{ + struct io_kiocb *nxt; + + /* + * The list should never be empty when we are called here. But could + * potentially happen if the chain is messed up, check to be on the + * safe side. + */ + nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list); + if (nxt) { + list_del(&nxt->list); + if (!list_empty(&req->link_list)) { + INIT_LIST_HEAD(&nxt->link_list); + list_splice(&req->link_list, &nxt->link_list); + nxt->flags |= REQ_F_LINK; + } + + INIT_WORK(&nxt->work, io_sq_wq_submit_work); + queue_work(req->ctx->sqo_wq, &nxt->work); + } +} + +/* + * Called if REQ_F_LINK is set, and we fail the head request + */ +static void io_fail_links(struct io_kiocb *req) +{ + struct io_kiocb *link; + + while (!list_empty(&req->link_list)) { + link = list_first_entry(&req->link_list, struct io_kiocb, list); + list_del(&link->list); + + io_cqring_add_event(req->ctx, link->user_data, -ECANCELED); + __io_free_req(link); + } +} + +static void io_free_req(struct io_kiocb *req) +{ + /* + * If LINK is set, we have dependent requests in this chain. If we + * didn't fail this request, queue the first one up, moving any other + * dependencies to the next request. In case of failure, fail the rest + * of the chain. + */ + if (req->flags & REQ_F_LINK) { + if (req->flags & REQ_F_FAIL_LINK) + io_fail_links(req); + else + io_req_link_next(req); + } + + __io_free_req(req); +} + static void io_put_req(struct io_kiocb *req) { if (refcount_dec_and_test(&req->refs)) @@ -628,16 +691,17 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); - io_cqring_fill_event(ctx, req->user_data, req->error); + io_cqring_fill_event(ctx, req->user_data, req->result); (*nr_events)++; if (refcount_dec_and_test(&req->refs)) { /* If we're not using fixed files, we have to pair the * completion part with the file put. Use regular * completions for those, only batch free for fixed - * file. + * file and non-linked commands. */ - if (req->flags & REQ_F_FIXED_FILE) { + if ((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == + REQ_F_FIXED_FILE) { reqs[to_free++] = req; if (to_free == ARRAY_SIZE(reqs)) io_free_req_many(ctx, reqs, &to_free); @@ -776,6 +840,8 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); + if ((req->flags & REQ_F_LINK) && res != req->result) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, req->user_data, res); io_put_req(req); } @@ -786,7 +852,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) kiocb_end_write(kiocb); - req->error = res; + if ((req->flags & REQ_F_LINK) && res != req->result) + req->flags |= REQ_F_FAIL_LINK; + req->result = res; if (res != -EAGAIN) req->flags |= REQ_F_IOPOLL_COMPLETED; } @@ -929,7 +997,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, !kiocb->ki_filp->f_op->iopoll) return -EOPNOTSUPP; - req->error = 0; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; } else { @@ -1001,9 +1068,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, return 0; } -static int io_import_iovec(struct io_ring_ctx *ctx, int rw, - const struct sqe_submit *s, struct iovec **iovec, - struct iov_iter *iter) +static ssize_t io_import_iovec(struct io_ring_ctx *ctx, int rw, + const struct sqe_submit *s, struct iovec **iovec, + struct iov_iter *iter) { const struct io_uring_sqe *sqe = s->sqe; void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -1021,7 +1088,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, opcode = READ_ONCE(sqe->opcode); if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - int ret = io_import_fixed(ctx, rw, sqe, iter); + ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); *iovec = NULL; return ret; } @@ -1087,7 +1154,7 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t read_size, ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1100,16 +1167,30 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; + read_size = ret; + if (req->flags & REQ_F_LINK) + req->result = read_size; + iov_count = iov_iter_count(&iter); ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); if (!ret) { ssize_t ret2; - /* Catch -EAGAIN return for forced non-blocking submission */ ret2 = call_read_iter(file, kiocb, &iter); + /* + * In case of a short read, punt to async. This can happen + * if we have data partially cached. Alternatively we can + * return the short read, in which case the application will + * need to issue another SQE and wait for it. That SQE will + * need async punt anyway, so it's more efficient to do it + * here. + */ + if (force_nonblock && ret2 > 0 && ret2 < read_size) + ret2 = -EAGAIN; + /* Catch -EAGAIN return for forced non-blocking submission */ if (!force_nonblock || ret2 != -EAGAIN) { io_rw_done(kiocb, ret2); } else { @@ -1134,7 +1215,7 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, struct iov_iter iter; struct file *file; size_t iov_count; - int ret; + ssize_t ret; ret = io_prep_rw(req, s, force_nonblock); if (ret) @@ -1147,9 +1228,12 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, return -EINVAL; ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); - if (ret) + if (ret < 0) return ret; + if (req->flags & REQ_F_LINK) + req->result = ret; + iov_count = iov_iter_count(&iter); ret = -EAGAIN; @@ -1253,6 +1337,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; @@ -1297,11 +1383,70 @@ static int io_sync_file_range(struct io_kiocb *req, ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); + if (ret < 0 && (req->flags & REQ_F_LINK)) + req->flags |= REQ_F_FAIL_LINK; io_cqring_add_event(req->ctx, sqe->user_data, ret); io_put_req(req); return 0; } +#if defined(CONFIG_NET) +static int io_send_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock, + long (*fn)(struct socket *, struct user_msghdr __user *, + unsigned int)) +{ + struct socket *sock; + int ret; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + + sock = sock_from_file(req->file, &ret); + if (sock) { + struct user_msghdr __user *msg; + unsigned flags; + + flags = READ_ONCE(sqe->msg_flags); + if (flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + else if (force_nonblock) + flags |= MSG_DONTWAIT; + + msg = (struct user_msghdr __user *) (unsigned long) + READ_ONCE(sqe->addr); + + ret = fn(sock, msg, flags); + if (force_nonblock && ret == -EAGAIN) + return ret; + } + + io_cqring_add_event(req->ctx, sqe->user_data, ret); + io_put_req(req); + return 0; +} +#endif + +static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) +{ +#if defined(CONFIG_NET) + return io_send_recvmsg(req, sqe, force_nonblock, __sys_sendmsg_sock); +#else + return -EOPNOTSUPP; +#endif +} + +static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe, + bool force_nonblock) +{ +#if defined(CONFIG_NET) + return io_send_recvmsg(req, sqe, force_nonblock, __sys_recvmsg_sock); +#else + return -EOPNOTSUPP; +#endif +} + static void io_poll_remove_one(struct io_kiocb *req) { struct io_poll_iocb *poll = &req->poll; @@ -1549,9 +1694,10 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, { int ret, opcode; + req->user_data = READ_ONCE(s->sqe->user_data); + if (unlikely(s->index >= ctx->sq_entries)) return -EINVAL; - req->user_data = READ_ONCE(s->sqe->user_data); opcode = READ_ONCE(s->sqe->opcode); switch (opcode) { @@ -1586,6 +1732,12 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_SYNC_FILE_RANGE: ret = io_sync_file_range(req, s->sqe, force_nonblock); break; + case IORING_OP_SENDMSG: + ret = io_sendmsg(req, s->sqe, force_nonblock); + break; + case IORING_OP_RECVMSG: + ret = io_recvmsg(req, s->sqe, force_nonblock); + break; default: ret = -EINVAL; break; @@ -1595,7 +1747,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, return ret; if (ctx->flags & IORING_SETUP_IOPOLL) { - if (req->error == -EAGAIN) + if (req->result == -EAGAIN) return -EAGAIN; /* workqueue context doesn't hold uring_lock, grab it now */ @@ -1819,31 +1971,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, return 0; } -static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, - struct io_submit_state *state) +static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, + struct sqe_submit *s) { - struct io_kiocb *req; int ret; - /* enforce forwards compatibility on users */ - if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN))) - return -EINVAL; - - req = io_get_req(ctx, state); - if (unlikely(!req)) - return -EAGAIN; - - ret = io_req_set_file(ctx, s, state, req); - if (unlikely(ret)) - goto out; - - ret = io_req_defer(ctx, req, s->sqe); - if (ret) { - if (ret == -EIOCBQUEUED) - ret = 0; - return ret; - } - ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; @@ -1866,24 +1998,93 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, /* * Queued up for async execution, worker will release - * submit reference when the iocb is actually - * submitted. + * submit reference when the iocb is actually submitted. */ return 0; } } -out: /* drop submission reference */ io_put_req(req); /* and drop final reference, if we failed */ - if (ret) + if (ret) { + io_cqring_add_event(ctx, req->user_data, ret); + if (req->flags & REQ_F_LINK) + req->flags |= REQ_F_FAIL_LINK; io_put_req(req); + } return ret; } +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK) + +static void io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, + struct io_submit_state *state, struct io_kiocb **link) +{ + struct io_uring_sqe *sqe_copy; + struct io_kiocb *req; + int ret; + + /* enforce forwards compatibility on users */ + if (unlikely(s->sqe->flags & ~SQE_VALID_FLAGS)) { + ret = -EINVAL; + goto err; + } + + req = io_get_req(ctx, state); + if (unlikely(!req)) { + ret = -EAGAIN; + goto err; + } + + ret = io_req_set_file(ctx, s, state, req); + if (unlikely(ret)) { +err_req: + io_free_req(req); +err: + io_cqring_add_event(ctx, s->sqe->user_data, ret); + return; + } + + ret = io_req_defer(ctx, req, s->sqe); + if (ret) { + if (ret != -EIOCBQUEUED) + goto err_req; + return; + } + + /* + * If we already have a head request, queue this one for async + * submittal once the head completes. If we don't have a head but + * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be + * submitted sync once the chain is complete. If none of those + * conditions are true (normal request), then just queue it. + */ + if (*link) { + struct io_kiocb *prev = *link; + + sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL); + if (!sqe_copy) { + ret = -EAGAIN; + goto err_req; + } + + s->sqe = sqe_copy; + memcpy(&req->submit, s, sizeof(*s)); + list_add_tail(&req->list, &prev->link_list); + } else if (s->sqe->flags & IOSQE_IO_LINK) { + req->flags |= REQ_F_LINK; + + memcpy(&req->submit, s, sizeof(*s)); + INIT_LIST_HEAD(&req->link_list); + *link = req; + } else { + io_queue_sqe(ctx, req, s); + } +} + /* * Batched submission is done, ensure local IO is flushed out. */ @@ -1966,7 +2167,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, unsigned int nr, bool has_user, bool mm_fault) { struct io_submit_state state, *statep = NULL; - int ret, i, submitted = 0; + struct io_kiocb *link = NULL; + bool prev_was_link = false; + int i, submitted = 0; if (nr > IO_PLUG_THRESHOLD) { io_submit_state_start(&state, ctx, nr); @@ -1974,22 +2177,30 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, struct sqe_submit *sqes, } for (i = 0; i < nr; i++) { + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. + */ + if (!prev_was_link && link) { + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } + prev_was_link = (sqes[i].sqe->flags & IOSQE_IO_LINK) != 0; + if (unlikely(mm_fault)) { - ret = -EFAULT; + io_cqring_add_event(ctx, sqes[i].sqe->user_data, + -EFAULT); } else { sqes[i].has_user = has_user; sqes[i].needs_lock = true; sqes[i].needs_fixed_file = true; - ret = io_submit_sqe(ctx, &sqes[i], statep); - } - if (!ret) { + io_submit_sqe(ctx, &sqes[i], statep, &link); submitted++; - continue; } - - io_cqring_add_event(ctx, sqes[i].sqe->user_data, ret); } + if (link) + io_queue_sqe(ctx, link, &link->submit); if (statep) io_submit_state_end(&state); @@ -2006,6 +2217,8 @@ static int io_sq_thread(void *data) unsigned inflight; unsigned long timeout; + complete(&ctx->sqo_thread_started); + old_fs = get_fs(); set_fs(USER_DS); @@ -2130,6 +2343,8 @@ static int io_sq_thread(void *data) static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) { struct io_submit_state state, *statep = NULL; + struct io_kiocb *link = NULL; + bool prev_was_link = false; int i, submit = 0; if (to_submit > IO_PLUG_THRESHOLD) { @@ -2139,22 +2354,30 @@ static int io_ring_submit(struct io_ring_ctx *ctx, unsigned int to_submit) for (i = 0; i < to_submit; i++) { struct sqe_submit s; - int ret; if (!io_get_sqring(ctx, &s)) break; + /* + * If previous wasn't linked and we have a linked command, + * that's the end of the chain. Submit the previous link. + */ + if (!prev_was_link && link) { + io_queue_sqe(ctx, link, &link->submit); + link = NULL; + } + prev_was_link = (s.sqe->flags & IOSQE_IO_LINK) != 0; + s.has_user = true; s.needs_lock = false; s.needs_fixed_file = false; submit++; - - ret = io_submit_sqe(ctx, &s, statep); - if (ret) - io_cqring_add_event(ctx, s.sqe->user_data, ret); + io_submit_sqe(ctx, &s, statep, &link); } io_commit_sqring(ctx); + if (link) + io_queue_sqe(ctx, link, &link->submit); if (statep) io_submit_state_end(statep); @@ -2240,6 +2463,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) static void io_sq_thread_stop(struct io_ring_ctx *ctx) { if (ctx->sqo_thread) { + wait_for_completion(&ctx->sqo_thread_started); /* * The park is a bit of a work-around, without it we get * warning spews on shutdown with SQPOLL set and affinity diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index ba34dae8bd9f..10ee0ecca1a8 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -98,24 +98,16 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) /* Lock against other parallel changes of flags */ inode_lock(inode); - oldflags = jfs_inode->mode2; - - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ - if ((oldflags & JFS_IMMUTABLE_FL) || - ((flags ^ oldflags) & - (JFS_APPEND_FL | JFS_IMMUTABLE_FL))) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - inode_unlock(inode); - err = -EPERM; - goto setflags_out; - } + oldflags = jfs_map_ext2(jfs_inode->mode2 & JFS_FL_USER_VISIBLE, + 0); + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) { + inode_unlock(inode); + goto setflags_out; } flags = flags & JFS_FL_USER_MODIFIABLE; - flags |= oldflags & ~JFS_FL_USER_MODIFIABLE; + flags |= jfs_inode->mode2 & ~JFS_FL_USER_MODIFIABLE; jfs_inode->mode2 = flags; jfs_set_inode_flags(inode); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9b96d79eea6c..91b9dac6b2cc 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -148,13 +148,8 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp, oldflags = NILFS_I(inode)->i_flags; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by the - * relevant capability. - */ - ret = -EPERM; - if (((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) && - !capable(CAP_LINUX_IMMUTABLE)) + ret = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (ret) goto out; ret = nilfs_transaction_begin(inode->i_sb, &ti, 0); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 994726ada857..d6f7b299eb23 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -106,16 +106,9 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags, flags = flags & mask; flags |= oldflags & ~mask; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ - status = -EPERM; - if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) & - (OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) { - if (!capable(CAP_LINUX_IMMUTABLE)) - goto bail_unlock; - } + status = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (status) + goto bail_unlock; handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index a35c17017210..679a3c8e4fb3 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -357,11 +357,28 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, return ret; } +static int orangefs_getflags(struct inode *inode, unsigned long *uval) +{ + __u64 val = 0; + int ret; + + ret = orangefs_inode_getxattr(inode, + "user.pvfs2.meta_hint", + &val, sizeof(val)); + if (ret < 0 && ret != -ENODATA) + return ret; + else if (ret == -ENODATA) + val = 0; + *uval = val; + return 0; +} + /* * Perform a miscellaneous operation on a file. */ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file_inode(file); int ret = -ENOTTY; __u64 val = 0; unsigned long uval; @@ -375,20 +392,16 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar * and append flags */ if (cmd == FS_IOC_GETFLAGS) { - val = 0; - ret = orangefs_inode_getxattr(file_inode(file), - "user.pvfs2.meta_hint", - &val, sizeof(val)); - if (ret < 0 && ret != -ENODATA) + ret = orangefs_getflags(inode, &uval); + if (ret) return ret; - else if (ret == -ENODATA) - val = 0; - uval = val; gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n", (unsigned long long)uval); return put_user(uval, (int __user *)arg); } else if (cmd == FS_IOC_SETFLAGS) { + unsigned long old_uval; + ret = 0; if (get_user(uval, (int __user *)arg)) return -EFAULT; @@ -404,11 +417,17 @@ static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long ar gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n"); return -EINVAL; } + ret = orangefs_getflags(inode, &old_uval); + if (ret) + return ret; + ret = vfs_ioc_setflags_prepare(inode, old_uval, uval); + if (ret) + return ret; val = uval; gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n", (unsigned long long)val); - ret = orangefs_inode_setxattr(file_inode(file), + ret = orangefs_inode_setxattr(inode, "user.pvfs2.meta_hint", &val, sizeof(val), 0); } diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index acbbaf7a0bb2..45e1a5d11af3 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -74,13 +74,11 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) err = -EPERM; goto setflags_out; } - if (((flags ^ REISERFS_I(inode)-> - i_attrs) & (REISERFS_IMMUTABLE_FL | - REISERFS_APPEND_FL)) - && !capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; + err = vfs_ioc_setflags_prepare(inode, + REISERFS_I(inode)->i_attrs, + flags); + if (err) goto setflags_out; - } if ((flags & REISERFS_NOTAIL_FL) && S_ISREG(inode->i_mode)) { int result; diff --git a/fs/splice.c b/fs/splice.c index 14cb602d9a2f..98412721f056 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1356,7 +1356,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1367,7 +1367,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, error = import_iovec(type, uiov, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } @@ -1382,7 +1382,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; struct iov_iter iter; - long error; + ssize_t error; struct fd f; int type; @@ -1393,7 +1393,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io error = compat_import_iovec(type, iov32, nr_segs, ARRAY_SIZE(iovstack), &iov, &iter); - if (!error) { + if (error >= 0) { error = do_vmsplice(f.file, &iter, flags); kfree(iov); } diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 4f1a397fda69..034ad14710d1 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -107,18 +107,11 @@ static int setflags(struct inode *inode, int flags) if (err) return err; - /* - * The IMMUTABLE and APPEND_ONLY flags can only be changed by - * the relevant capability. - */ mutex_lock(&ui->ui_mutex); oldflags = ubifs2ioctl(ui->flags); - if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { - if (!capable(CAP_LINUX_IMMUTABLE)) { - err = -EPERM; - goto out_unlock; - } - } + err = vfs_ioc_setflags_prepare(inode, oldflags, flags); + if (err) + goto out_unlock; ui->flags = ioctl2ubifs(flags); ubifs_set_inode_flags(inode); diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 91831975363b..b74a47169297 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -62,6 +62,7 @@ xfs-y += xfs_aops.o \ xfs_attr_inactive.o \ xfs_attr_list.o \ xfs_bmap_util.o \ + xfs_bio_io.o \ xfs_buf.o \ xfs_dir2_readdir.o \ xfs_discard.o \ @@ -80,9 +81,11 @@ xfs-y += xfs_aops.o \ xfs_iops.o \ xfs_inode.o \ xfs_itable.o \ + xfs_iwalk.o \ xfs_message.o \ xfs_mount.o \ xfs_mru_cache.o \ + xfs_pwork.o \ xfs_reflink.o \ xfs_stats.o \ xfs_super.o \ @@ -104,12 +107,8 @@ xfs-y += xfs_log.o \ xfs_rmap_item.o \ xfs_log_recover.o \ xfs_trans_ail.o \ - xfs_trans_bmap.o \ xfs_trans_buf.o \ - xfs_trans_extfree.o \ - xfs_trans_inode.o \ - xfs_trans_refcount.o \ - xfs_trans_rmap.o \ + xfs_trans_inode.o # optional features xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index fdd9d6ede25c..16bb9a328678 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -3,12 +3,7 @@ * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/mm.h> #include <linux/sched/mm.h> -#include <linux/highmem.h> -#include <linux/slab.h> -#include <linux/swap.h> -#include <linux/blkdev.h> #include <linux/backing-dev.h> #include "kmem.h" #include "xfs_message.h" diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 8e6b3ba81c03..267655acd426 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) return kmem_zone_alloc(zone, flags | KM_ZERO); } +static inline struct page * +kmem_to_page(void *addr) +{ + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + return virt_to_page(addr); +} + #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index b0c89f54d1bb..5de296b34ab1 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -10,6 +10,7 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" +#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" @@ -44,6 +45,12 @@ xfs_get_aghdr_buf( return bp; } +static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) +{ + return mp->m_sb.sb_logstart > 0 && + id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); +} + /* * Generic btree root block init function */ @@ -53,40 +60,85 @@ xfs_btroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0); + xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); } -/* - * Alloc btree root block init functions - */ +/* Finish initializing a free space btree. */ static void -xfs_bnoroot_init( +xfs_freesp_init_recs( struct xfs_mount *mp, struct xfs_buf *bp, struct aghdr_init_data *id) { struct xfs_alloc_rec *arec; + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); + + if (is_log_ag(mp, id)) { + struct xfs_alloc_rec *nrec; + xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, + mp->m_sb.sb_logstart); + + ASSERT(start >= mp->m_ag_prealloc_blocks); + if (start != mp->m_ag_prealloc_blocks) { + /* + * Modify first record to pad stripe align of log + */ + arec->ar_blockcount = cpu_to_be32(start - + mp->m_ag_prealloc_blocks); + nrec = arec + 1; + + /* + * Insert second record at start of internal log + * which then gets trimmed. + */ + nrec->ar_startblock = cpu_to_be32( + be32_to_cpu(arec->ar_startblock) + + be32_to_cpu(arec->ar_blockcount)); + arec = nrec; + be16_add_cpu(&block->bb_numrecs, 1); + } + /* + * Change record start to after the internal log + */ + be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks); + } + + /* + * Calculate the record block count and check for the case where + * the log might have consumed all available space in the AG. If + * so, reset the record count to 0 to avoid exposure of an invalid + * record start block. + */ arec->ar_blockcount = cpu_to_be32(id->agsize - be32_to_cpu(arec->ar_startblock)); + if (!arec->ar_blockcount) + block->bb_numrecs = 0; } +/* + * Alloc btree root block init functions + */ static void -xfs_cntroot_init( +xfs_bnoroot_init( struct xfs_mount *mp, struct xfs_buf *bp, struct aghdr_init_data *id) { - struct xfs_alloc_rec *arec; + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); + xfs_freesp_init_recs(mp, bp, id); +} - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); - arec->ar_blockcount = cpu_to_be32(id->agsize - - be32_to_cpu(arec->ar_startblock)); +static void +xfs_cntroot_init( + struct xfs_mount *mp, + struct xfs_buf *bp, + struct aghdr_init_data *id) +{ + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); + xfs_freesp_init_recs(mp, bp, id); } /* @@ -101,7 +153,7 @@ xfs_rmaproot_init( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_rmap_rec *rrec; - xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); /* * mark the AG header regions as static metadata The BNO @@ -149,6 +201,18 @@ xfs_rmaproot_init( rrec->rm_offset = 0; be16_add_cpu(&block->bb_numrecs, 1); } + + /* account for the log space */ + if (is_log_ag(mp, id)) { + rrec = XFS_RMAP_REC_ADDR(block, + be16_to_cpu(block->bb_numrecs) + 1); + rrec->rm_startblock = cpu_to_be32( + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart)); + rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + } } /* @@ -209,6 +273,14 @@ xfs_agfblock_init( agf->agf_refcount_level = cpu_to_be32(1); agf->agf_refcount_blocks = cpu_to_be32(1); } + + if (is_log_ag(mp, id)) { + int64_t logblocks = mp->m_sb.sb_logblocks; + + be32_add_cpu(&agf->agf_freeblks, -logblocks); + agf->agf_longest = cpu_to_be32(id->agsize - + XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks); + } } static void diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index e2ba2a3b63b2..87a9747f1d36 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -9,20 +9,12 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_alloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" -#include "xfs_bit.h" -#include "xfs_bmap.h" -#include "xfs_bmap_btree.h" -#include "xfs_ag_resv.h" -#include "xfs_trans_space.h" #include "xfs_rmap_btree.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index a9ff3cf82cce..372ad55631fc 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -13,7 +13,6 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_alloc_btree.h" @@ -21,7 +20,6 @@ #include "xfs_extent_busy.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" @@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq; STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); /* * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in @@ -555,7 +551,7 @@ static xfs_failaddr_t xfs_agfl_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); int i; @@ -596,7 +592,7 @@ static void xfs_agfl_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; /* @@ -621,7 +617,7 @@ static void xfs_agfl_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; @@ -700,6 +696,107 @@ xfs_alloc_update_counters( */ /* + * Deal with the case where only small freespaces remain. Either return the + * contents of the last freespace record, or allocate space from the freelist if + * there is nothing in the tree. + */ +STATIC int /* error */ +xfs_alloc_ag_vextent_small( + struct xfs_alloc_arg *args, /* allocation argument structure */ + struct xfs_btree_cur *ccur, /* optional by-size cursor */ + xfs_agblock_t *fbnop, /* result block number */ + xfs_extlen_t *flenp, /* result length */ + int *stat) /* status: 0-freelist, 1-normal/none */ +{ + int error = 0; + xfs_agblock_t fbno = NULLAGBLOCK; + xfs_extlen_t flen = 0; + int i = 0; + + /* + * If a cntbt cursor is provided, try to allocate the largest record in + * the tree. Try the AGFL if the cntbt is empty, otherwise fail the + * allocation. Make sure to respect minleft even when pulling from the + * freelist. + */ + if (ccur) + error = xfs_btree_decrement(ccur, 0, &i); + if (error) + goto error; + if (i) { + error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); + if (error) + goto error; + XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error); + goto out; + } + + if (args->minlen != 1 || args->alignment != 1 || + args->resv == XFS_AG_RESV_AGFL || + (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= + args->minleft)) + goto out; + + error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); + if (error) + goto error; + if (fbno == NULLAGBLOCK) + goto out; + + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, + xfs_alloc_allow_busy_reuse(args->datatype)); + + if (xfs_alloc_is_userdata(args->datatype)) { + struct xfs_buf *bp; + + bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); + if (!bp) { + error = -EFSCORRUPTED; + goto error; + } + xfs_trans_binval(args->tp, bp); + } + *fbnop = args->agbno = fbno; + *flenp = args->len = 1; + XFS_WANT_CORRUPTED_GOTO(args->mp, + fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), + error); + args->wasfromfl = 1; + trace_xfs_alloc_small_freelist(args); + + /* + * If we're feeding an AGFL block to something that doesn't live in the + * free space, we need to clear out the OWN_AG rmap. + */ + error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, + &XFS_RMAP_OINFO_AG); + if (error) + goto error; + + *stat = 0; + return 0; + +out: + /* + * Can't do the allocation, give up. + */ + if (flen < args->minlen) { + args->agbno = NULLAGBLOCK; + trace_xfs_alloc_small_notenough(args); + flen = 0; + } + *fbnop = fbno; + *flenp = flen; + *stat = 1; + trace_xfs_alloc_small_done(args); + return 0; + +error: + trace_xfs_alloc_small_error(args); + return error; +} + +/* * Allocate a variable extent in the allocation group agno. * Type and bno are used to determine where in the allocation group the * extent will start. @@ -1583,112 +1680,6 @@ out_nominleft: } /* - * Deal with the case where only small freespaces remain. - * Either return the contents of the last freespace record, - * or allocate space from the freelist if there is nothing in the tree. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent_small( - xfs_alloc_arg_t *args, /* allocation argument structure */ - xfs_btree_cur_t *ccur, /* by-size cursor */ - xfs_agblock_t *fbnop, /* result block number */ - xfs_extlen_t *flenp, /* result length */ - int *stat) /* status: 0-freelist, 1-normal/none */ -{ - int error; - xfs_agblock_t fbno; - xfs_extlen_t flen; - int i; - - if ((error = xfs_btree_decrement(ccur, 0, &i))) - goto error0; - if (i) { - if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); - } - /* - * Nothing in the btree, try the freelist. Make sure - * to respect minleft even when pulling from the - * freelist. - */ - else if (args->minlen == 1 && args->alignment == 1 && - args->resv != XFS_AG_RESV_AGFL && - (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) - > args->minleft)) { - error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); - if (error) - goto error0; - if (fbno != NULLAGBLOCK) { - xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, - xfs_alloc_allow_busy_reuse(args->datatype)); - - if (xfs_alloc_is_userdata(args->datatype)) { - xfs_buf_t *bp; - - bp = xfs_btree_get_bufs(args->mp, args->tp, - args->agno, fbno, 0); - if (!bp) { - error = -EFSCORRUPTED; - goto error0; - } - xfs_trans_binval(args->tp, bp); - } - args->len = 1; - args->agbno = fbno; - XFS_WANT_CORRUPTED_GOTO(args->mp, - args->agbno + args->len <= - be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), - error0); - args->wasfromfl = 1; - trace_xfs_alloc_small_freelist(args); - - /* - * If we're feeding an AGFL block to something that - * doesn't live in the free space, we need to clear - * out the OWN_AG rmap. - */ - error = xfs_rmap_free(args->tp, args->agbp, args->agno, - fbno, 1, &XFS_RMAP_OINFO_AG); - if (error) - goto error0; - - *stat = 0; - return 0; - } - /* - * Nothing in the freelist. - */ - else - flen = 0; - } - /* - * Can't allocate from the freelist for some reason. - */ - else { - fbno = NULLAGBLOCK; - flen = 0; - } - /* - * Can't do the allocation, give up. - */ - if (flen < args->minlen) { - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_small_notenough(args); - flen = 0; - } - *fbnop = fbno; - *flenp = flen; - *stat = 1; - trace_xfs_alloc_small_done(args); - return 0; - -error0: - trace_xfs_alloc_small_error(args); - return error; -} - -/* * Free the extent starting at agno/bno for length. */ STATIC int @@ -2095,7 +2086,7 @@ xfs_free_agfl_block( if (error) return error; - bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0); + bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); if (!bp) return -EFSCORRUPTED; xfs_trans_binval(tp, bp); @@ -2586,7 +2577,7 @@ static xfs_failaddr_t xfs_agf_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); if (xfs_sb_version_hascrc(&mp->m_sb)) { @@ -2644,7 +2635,7 @@ static void xfs_agf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -2661,7 +2652,7 @@ static void xfs_agf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; @@ -3146,7 +3137,7 @@ xfs_alloc_has_record( /* * Walk all the blocks in the AGFL. The @walk_fn can return any negative - * error code or XFS_BTREE_QUERY_RANGE_ABORT. + * error code or XFS_ITER_*. */ int xfs_agfl_walk( diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 9fe949f6055e..2a94543857a1 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -17,7 +17,6 @@ #include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" @@ -292,7 +291,7 @@ static xfs_failaddr_t xfs_allocbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c441f41f14e8..d48fcf11cc35 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -9,23 +9,18 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr_sf.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_attr_remote.h" -#include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 3b0dce06e454..ff28ebf3b635 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context { struct xfs_inode *dp; /* inode */ struct attrlist_cursor_kern *cursor; /* position in list */ char *alist; /* output buffer */ - int seen_enough; /* T/F: seen enough of list? */ + + /* + * Abort attribute list iteration if non-zero. Can be used to pass + * error values to the xfs_attr_list caller. + */ + int seen_enough; + ssize_t count; /* num used entries */ int dupcnt; /* count dup hashvals seen */ int bufsize; /* total buffer size */ diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 1f6e3965ff74..70eb941d02e4 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -10,14 +10,12 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_attr_sf.h" @@ -27,7 +25,6 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_dir2.h" #include "xfs_log.h" @@ -240,7 +237,7 @@ xfs_attr3_leaf_verify( struct xfs_buf *bp) { struct xfs_attr3_icleaf_hdr ichdr; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *entries; uint32_t end; /* must be 32bit - see below */ @@ -313,7 +310,7 @@ static void xfs_attr3_leaf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; @@ -343,7 +340,7 @@ static void xfs_attr3_leaf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -865,7 +862,7 @@ xfs_attr_shortform_allfit( struct xfs_attr3_icleaf_hdr leafhdr; int bytes; int i; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); @@ -1525,7 +1522,7 @@ xfs_attr_leaf_order( { struct xfs_attr3_icleaf_hdr ichdr1; struct xfs_attr3_icleaf_hdr ichdr2; - struct xfs_mount *mp = leaf1_bp->b_target->bt_mount; + struct xfs_mount *mp = leaf1_bp->b_mount; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr); xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr); @@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash( { struct xfs_attr3_icleaf_hdr ichdr; struct xfs_attr_leaf_entry *entries; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr); entries = xfs_attr3_leaf_entryp(bp->b_addr); diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 65ff600a8067..4eb30d357045 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -16,18 +16,10 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_attr_remote.h" -#include "xfs_trans_space.h" #include "xfs_trace.h" -#include "xfs_cksum.h" -#include "xfs_buf_item.h" #include "xfs_error.h" #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ @@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify( bool check_crc, xfs_failaddr_t *failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; char *ptr; int len; xfs_daddr_t bno; @@ -175,7 +167,7 @@ static void xfs_attr3_rmt_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; int blksize = mp->m_attr_geo->blksize; char *ptr; @@ -535,7 +527,7 @@ xfs_attr_rmtval_set( dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); + bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt); if (!bp) return -ENOMEM; bp->b_ops = &xfs_attr3_rmt_buf_ops; diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c index 40ce5f3094d1..7071ff98fdbc 100644 --- a/fs/xfs/libxfs/xfs_bit.c +++ b/fs/xfs/libxfs/xfs_bit.c @@ -5,7 +5,6 @@ */ #include "xfs.h" #include "xfs_log_format.h" -#include "xfs_bit.h" /* * XFS bit manipulation routines, used in non-realtime code. diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 356ebd1cbe82..baf0b72c0a37 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -13,14 +13,10 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -32,7 +28,6 @@ #include "xfs_trans_space.h" #include "xfs_buf_item.h" #include "xfs_trace.h" -#include "xfs_symlink.h" #include "xfs_attr_leaf.h" #include "xfs_filestream.h" #include "xfs_rmap.h" @@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents( bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (!bp) { bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) @@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents( bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (!bp) { bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, NULL, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) @@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents( XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, xfs_btree_check_lptr(cur, cbno, 1)); #endif - error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, + error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; @@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree( cur->bc_private.b.allocated++; ip->i_d.di_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); - abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); + abp = xfs_btree_get_bufl(mp, tp, args.fsbno); if (!abp) { error = -EFSCORRUPTED; goto out_unreserve_dquot; @@ -878,7 +873,7 @@ xfs_bmap_local_to_extents( ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); tp->t_firstblock = args.fsbno; - bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); + bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno); /* * Initialize the block, copy the data and log the remote buffer. @@ -1203,7 +1198,7 @@ xfs_iread_extents( * pointer (leftmost) at each level. */ while (level-- > 0) { - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) goto out; @@ -1276,7 +1271,7 @@ xfs_iread_extents( */ if (bno == NULLFSBLOCK) break; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index aff82ed112c9..fbb18ba5d905 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -11,10 +11,8 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_bmap_btree.h" @@ -22,7 +20,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_rmap.h" /* @@ -411,7 +408,7 @@ static xfs_failaddr_t xfs_bmbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); xfs_failaddr_t fa; unsigned int level; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index bbdae2b4559f..f1048efa4268 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -11,16 +11,13 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_buf_item.h" #include "xfs_btree.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_alloc.h" #include "xfs_log.h" @@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_buf_log_item *bip = bp->b_log_item; - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) return; if (bip) block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); @@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) @@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_buf_log_item *bip = bp->b_log_item; - if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) return; if (bip) block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); @@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (xfs_sb_version_hascrc(&mp->m_sb)) { if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) @@ -691,14 +688,13 @@ xfs_buf_t * /* buffer for fsbno */ xfs_btree_get_bufl( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock) /* lock flags for get_buf */ + xfs_fsblock_t fsbno) /* file system block number */ { xfs_daddr_t d; /* real disk block address */ ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); } /* @@ -710,15 +706,14 @@ xfs_btree_get_bufs( xfs_mount_t *mp, /* file system mount point */ xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock) /* lock flags for get_buf */ + xfs_agblock_t agbno) /* allocation group block number */ { xfs_daddr_t d; /* real disk block address */ ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); } /* @@ -845,7 +840,6 @@ xfs_btree_read_bufl( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ struct xfs_buf **bpp, /* buffer for fsbno */ int refval, /* ref count value for buffer */ const struct xfs_buf_ops *ops) @@ -858,7 +852,7 @@ xfs_btree_read_bufl( return -EFSCORRUPTED; d = XFS_FSB_TO_DADDR(mp, fsbno); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp, ops); + mp->m_bsize, 0, &bp, ops); if (error) return error; if (bp) @@ -1185,11 +1179,10 @@ xfs_btree_init_block( xfs_btnum_t btnum, __u16 level, __u16 numrecs, - __u64 owner, - unsigned int flags) + __u64 owner) { xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, - btnum, level, numrecs, owner, flags); + btnum, level, numrecs, owner, 0); } STATIC void @@ -1288,7 +1281,6 @@ STATIC int xfs_btree_get_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, - int flags, struct xfs_btree_block **block, struct xfs_buf **bpp) { @@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block( xfs_daddr_t d; int error; - /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XBF_TRYLOCK)); - error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags); + mp->m_bsize, 0); if (!*bpp) return -ENOMEM; @@ -2706,7 +2695,7 @@ __xfs_btree_split( XFS_BTREE_STATS_INC(cur, alloc); /* Set up the new block as "right". */ - error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); + error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); if (error) goto error0; @@ -2961,7 +2950,7 @@ xfs_btree_new_iroot( XFS_BTREE_STATS_INC(cur, alloc); /* Copy the root into a real block. */ - error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); + error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); if (error) goto error0; @@ -3058,7 +3047,7 @@ xfs_btree_new_root( XFS_BTREE_STATS_INC(cur, alloc); /* Set up the new block. */ - error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); + error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); if (error) goto error0; @@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify( struct xfs_buf *bp, uint64_t owner) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify( struct xfs_buf *bp, unsigned int max_recs) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); /* numrecs verification */ @@ -4484,7 +4473,7 @@ xfs_failaddr_t xfs_btree_sblock_v5hdr_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; @@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify( struct xfs_buf *bp, unsigned int max_recs) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); xfs_agblock_t agno; diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index e3b3e9dce5da..fa3cd8ab9aba 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -301,8 +301,7 @@ struct xfs_buf * /* buffer for fsbno */ xfs_btree_get_bufl( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock); /* lock flags for get_buf */ + xfs_fsblock_t fsbno); /* file system block number */ /* * Get a buffer for the block, return it with no data read. @@ -313,8 +312,7 @@ xfs_btree_get_bufs( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - uint lock); /* lock flags for get_buf */ + xfs_agblock_t agbno); /* allocation group block number */ /* * Check for the cursor referring to the last block at the given level. @@ -345,7 +343,6 @@ xfs_btree_read_bufl( struct xfs_mount *mp, /* file system mount point */ struct xfs_trans *tp, /* transaction pointer */ xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ struct xfs_buf **bpp, /* buffer for fsbno */ int refval, /* ref count value for buffer */ const struct xfs_buf_ops *ops); @@ -383,8 +380,7 @@ xfs_btree_init_block( xfs_btnum_t btnum, __u16 level, __u16 numrecs, - __u64 owner, - unsigned int flags); + __u64 owner); void xfs_btree_init_block_int( @@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len); unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); /* return codes */ -#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ -#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */ +#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */ +#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */ typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, union xfs_btree_rec *rec, void *priv); diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index e2737e2ac2ae..d1c77fd0815d 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -12,20 +12,14 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" #include "xfs_bmap.h" -#include "xfs_attr.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_buf_item.h" #include "xfs_log.h" @@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify( struct xfs_buf *bp, struct xfs_da3_blkinfo *hdr3) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_da_blkinfo *hdr = &hdr3->hdr; if (!xfs_verify_magic16(bp, hdr->magic)) @@ -148,7 +142,7 @@ static xfs_failaddr_t xfs_da3_node_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_da_intnode *hdr = bp->b_addr; struct xfs_da3_icnode_hdr ichdr; const struct xfs_dir_ops *ops; @@ -186,7 +180,7 @@ static void xfs_da3_node_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_da3_node_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c index b39053dcb643..b1ae572496b6 100644 --- a/fs/xfs/libxfs/xfs_da_format.c +++ b/fs/xfs/libxfs/xfs_da_format.c @@ -11,11 +11,8 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" -#include "xfs_dir2_priv.h" /* * Shortform directory ops diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 1c6bf2105939..eb2be2a6a25a 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -9,8 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 156ce95c9c45..67840723edbb 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -5,20 +5,16 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_ialloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index b7d6d78f4ce2..a6fb0cc2085e 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -6,22 +6,19 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_buf_item.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_log.h" /* @@ -50,7 +47,7 @@ static xfs_failaddr_t xfs_dir3_block_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_verify_magic(bp, hdr3->magic)) @@ -71,7 +68,7 @@ static void xfs_dir3_block_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -88,7 +85,7 @@ static void xfs_dir3_block_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index b7b9ce002cb9..2c79be4c3153 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -6,19 +6,16 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" -#include "xfs_dir2_priv.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_log.h" static xfs_failaddr_t xfs_dir2_data_freefind_verify( @@ -50,14 +47,13 @@ __xfs_dir3_data_check( int i; /* leaf index */ int lastfree; /* last entry was unused */ xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ - xfs_mount_t *mp; /* filesystem mount point */ + struct xfs_mount *mp = bp->b_mount; char *p; /* current data position */ int stale; /* count of stale leaves */ struct xfs_name name; const struct xfs_dir_ops *ops; struct xfs_da_geometry *geo; - mp = bp->b_target->bt_mount; geo = mp->m_dir_geo; /* @@ -249,7 +245,7 @@ static xfs_failaddr_t xfs_dir3_data_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; if (!xfs_verify_magic(bp, hdr3->magic)) @@ -298,7 +294,7 @@ static void xfs_dir3_data_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -315,7 +311,7 @@ static void xfs_dir3_data_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index 9c2a0a13ed61..a53e4585a2f3 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -6,12 +6,11 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_dir2.h" @@ -20,8 +19,6 @@ #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" -#include "xfs_log.h" /* * Local function declarations. @@ -144,7 +141,7 @@ static xfs_failaddr_t xfs_dir3_leaf_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_leaf *leaf = bp->b_addr; xfs_failaddr_t fa; @@ -159,7 +156,7 @@ static void xfs_dir3_leaf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -176,7 +173,7 @@ static void xfs_dir3_leaf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 16731d2d684b..afcc6642690a 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -6,12 +6,11 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "xfs_dir2.h" @@ -20,7 +19,6 @@ #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_log.h" /* @@ -84,7 +82,7 @@ static xfs_failaddr_t xfs_dir3_free_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dir2_free_hdr *hdr = bp->b_addr; if (!xfs_verify_magic(bp, hdr->magic)) @@ -110,7 +108,7 @@ static void xfs_dir3_free_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -127,7 +125,7 @@ static void xfs_dir3_free_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 585dfdb7b6b6..033589257f54 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -5,16 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_error.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_trace.h" diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 88fa11071f9f..e8bd688a4073 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -16,8 +16,6 @@ #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_error.h" -#include "xfs_cksum.h" -#include "xfs_trace.h" int xfs_calc_dquots_per_chunk( @@ -224,7 +222,7 @@ static xfs_failaddr_t xfs_dquot_buf_verify_struct( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; return xfs_dquot_buf_verify(mp, bp, false); } @@ -233,7 +231,7 @@ static void xfs_dquot_buf_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (!xfs_dquot_buf_verify_crc(mp, bp, false)) return; @@ -250,7 +248,7 @@ static void xfs_dquot_buf_readahead_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; if (!xfs_dquot_buf_verify_crc(mp, bp, true) || xfs_dquot_buf_verify(mp, bp, true) != NULL) { @@ -268,7 +266,7 @@ static void xfs_dquot_buf_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_dquot_buf_verify(mp, bp, false); } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9bb3c48843ec..c968b60cee15 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) #define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1) #define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog #define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog -#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log +#define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log) #define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log #define XFS_INO_BITS(mp) \ XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp) diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index e7382c780ed7..52d03a3a02a4 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -97,7 +97,7 @@ struct getbmapx { * For use by backup and restore programs to set the XFS on-disk inode * fields di_dmevmask and di_dmstate. These must be set to exactly and * only values previously obtained via xfs_bulkstat! (Specifically the - * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) + * struct xfs_bstat fields bs_dmevmask and bs_dmstate.) */ #ifndef HAVE_FSDMIDATA struct fsdmidata { @@ -328,7 +328,7 @@ typedef struct xfs_bstime { __s32 tv_nsec; /* and nanoseconds */ } xfs_bstime_t; -typedef struct xfs_bstat { +struct xfs_bstat { __u64 bs_ino; /* inode number */ __u16 bs_mode; /* type and mode */ __u16 bs_nlink; /* number of links */ @@ -356,7 +356,53 @@ typedef struct xfs_bstat { __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ -} xfs_bstat_t; +}; + +/* New bulkstat structure that reports v5 features and fixes padding issues */ +struct xfs_bulkstat { + uint64_t bs_ino; /* inode number */ + uint64_t bs_size; /* file size */ + + uint64_t bs_blocks; /* number of blocks */ + uint64_t bs_xflags; /* extended flags */ + + uint64_t bs_atime; /* access time, seconds */ + uint64_t bs_mtime; /* modify time, seconds */ + + uint64_t bs_ctime; /* inode change time, seconds */ + uint64_t bs_btime; /* creation time, seconds */ + + uint32_t bs_gen; /* generation count */ + uint32_t bs_uid; /* user id */ + uint32_t bs_gid; /* group id */ + uint32_t bs_projectid; /* project id */ + + uint32_t bs_atime_nsec; /* access time, nanoseconds */ + uint32_t bs_mtime_nsec; /* modify time, nanoseconds */ + uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */ + uint32_t bs_btime_nsec; /* creation time, nanoseconds */ + + uint32_t bs_blksize; /* block size */ + uint32_t bs_rdev; /* device value */ + uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */ + uint32_t bs_extsize_blks; /* extent size hint, blocks */ + + uint32_t bs_nlink; /* number of links */ + uint32_t bs_extents; /* number of extents */ + uint32_t bs_aextents; /* attribute number of extents */ + uint16_t bs_version; /* structure version */ + uint16_t bs_forkoff; /* inode fork offset in bytes */ + + uint16_t bs_sick; /* sick inode metadata */ + uint16_t bs_checked; /* checked inode metadata */ + uint16_t bs_mode; /* type and mode */ + uint16_t bs_pad2; /* zeroed */ + + uint64_t bs_pad[7]; /* zeroed */ +}; + +#define XFS_BULKSTAT_VERSION_V1 (1) +#define XFS_BULKSTAT_VERSION_V5 (5) /* bs_sick flags */ #define XFS_BS_SICK_INODE (1 << 0) /* inode core */ @@ -374,7 +420,7 @@ typedef struct xfs_bstat { * to retain compatibility with "old" filesystems). */ static inline uint32_t -bstat_get_projid(struct xfs_bstat *bs) +bstat_get_projid(const struct xfs_bstat *bs) { return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; } @@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs) /* * The user-level BulkStat Request interface structure. */ -typedef struct xfs_fsop_bulkreq { +struct xfs_fsop_bulkreq { __u64 __user *lastip; /* last inode # pointer */ __s32 icount; /* count of entries in buffer */ void __user *ubuffer;/* user buffer for inode desc. */ __s32 __user *ocount; /* output count pointer */ -} xfs_fsop_bulkreq_t; - +}; /* * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). */ -typedef struct xfs_inogrp { +struct xfs_inogrp { __u64 xi_startino; /* starting inode number */ __s32 xi_alloccount; /* # bits set in allocmask */ __u64 xi_allocmask; /* mask of allocated inodes */ -} xfs_inogrp_t; +}; +/* New inumbers structure that reports v5 features and fixes padding issues */ +struct xfs_inumbers { + uint64_t xi_startino; /* starting inode number */ + uint64_t xi_allocmask; /* mask of allocated inodes */ + uint8_t xi_alloccount; /* # bits set in allocmask */ + uint8_t xi_version; /* version */ + uint8_t xi_padding[6]; /* zero */ +}; + +#define XFS_INUMBERS_VERSION_V1 (1) +#define XFS_INUMBERS_VERSION_V5 (5) + +/* Header for bulk inode requests. */ +struct xfs_bulk_ireq { + uint64_t ino; /* I/O: start with this inode */ + uint32_t flags; /* I/O: operation flags */ + uint32_t icount; /* I: count of entries in buffer */ + uint32_t ocount; /* O: count of entries filled out */ + uint32_t agno; /* I: see comment for IREQ_AGNO */ + uint64_t reserved[5]; /* must be zero */ +}; + +/* + * Only return results from the specified @agno. If @ino is zero, start + * with the first inode of @agno. + */ +#define XFS_BULK_IREQ_AGNO (1 << 0) + +/* + * Return bulkstat information for a single inode, where @ino value is a + * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_* + * values below. Not compatible with XFS_BULK_IREQ_AGNO. + */ +#define XFS_BULK_IREQ_SPECIAL (1 << 1) + +#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ + XFS_BULK_IREQ_SPECIAL) + +/* Operate on the root directory inode. */ +#define XFS_BULK_IREQ_SPECIAL_ROOT (1) + +/* + * ioctl structures for v5 bulkstat and inumbers requests + */ +struct xfs_bulkstat_req { + struct xfs_bulk_ireq hdr; + struct xfs_bulkstat bulkstat[]; +}; +#define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \ + (nr) * sizeof(struct xfs_bulkstat)) + +struct xfs_inumbers_req { + struct xfs_bulk_ireq hdr; + struct xfs_inumbers inumbers[]; +}; +#define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \ + (nr) * sizeof(struct xfs_inumbers)) /* * Error injection. @@ -529,7 +631,7 @@ typedef struct xfs_swapext xfs_off_t sx_offset; /* offset into file */ xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ - xfs_bstat_t sx_stat; /* stat of target b4 copy */ + struct xfs_bstat sx_stat; /* stat of target b4 copy */ } xfs_swapext_t; /* @@ -701,6 +803,8 @@ struct xfs_scrub_metadata { #define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4) #define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t) #define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) +#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req) +#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req) /* XFS_IOC_GETFSUUID ---------- deprecated 140 */ diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 49ddfeac19f2..272005ac8c88 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip) void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); -void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs); +void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); #endif /* __XFS_HEALTH_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index fe9898875097..04377ab75863 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -12,17 +12,14 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" -#include "xfs_rtalloc.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_icreate_item.h" @@ -31,20 +28,6 @@ #include "xfs_log.h" #include "xfs_rmap.h" - -/* - * Allocation group level functions. - */ -int -xfs_ialloc_cluster_alignment( - struct xfs_mount *mp) -{ - if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) - return mp->m_sb.sb_inoalignmt; - return 1; -} - /* * Lookup a record by ino in the btree given by cur. */ @@ -299,7 +282,7 @@ xfs_ialloc_inode_init( * sizes, manipulate the inodes in buffers which are multiples of the * blocks size. */ - nbufs = length / mp->m_blocks_per_cluster; + nbufs = length / M_IGEO(mp)->blocks_per_cluster; /* * Figure out what version number to use in the inodes we create. If @@ -343,9 +326,10 @@ xfs_ialloc_inode_init( * Get the block. */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + - (j * mp->m_blocks_per_cluster)); + (j * M_IGEO(mp)->blocks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, - mp->m_bsize * mp->m_blocks_per_cluster, + mp->m_bsize * + M_IGEO(mp)->blocks_per_cluster, XBF_UNMAPPED); if (!fbuf) return -ENOMEM; @@ -353,7 +337,7 @@ xfs_ialloc_inode_init( /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); - for (i = 0; i < mp->m_inodes_per_cluster; i++) { + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); @@ -616,24 +600,26 @@ error: * Allocate new inodes in the allocation group specified by agbp. * Return 0 for success, else error code. */ -STATIC int /* error code or 0 */ +STATIC int xfs_ialloc_ag_alloc( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* alloc group buffer */ - int *alloc) + struct xfs_trans *tp, + struct xfs_buf *agbp, + int *alloc) { - xfs_agi_t *agi; /* allocation group header */ - xfs_alloc_arg_t args; /* allocation argument structure */ - xfs_agnumber_t agno; - int error; - xfs_agino_t newino; /* new first inode's number */ - xfs_agino_t newlen; /* new number of inodes */ - int isaligned = 0; /* inode allocation at stripe unit */ - /* boundary */ - uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */ + struct xfs_agi *agi; + struct xfs_alloc_arg args; + xfs_agnumber_t agno; + int error; + xfs_agino_t newino; /* new first inode's number */ + xfs_agino_t newlen; /* new number of inodes */ + int isaligned = 0; /* inode allocation at stripe */ + /* unit boundary */ + /* init. to full chunk */ + uint16_t allocmask = (uint16_t) -1; struct xfs_inobt_rec_incore rec; - struct xfs_perag *pag; - int do_sparse = 0; + struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp); + int do_sparse = 0; memset(&args, 0, sizeof(args)); args.tp = tp; @@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc( #ifdef DEBUG /* randomly do sparse inode allocations */ if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks) + igeo->ialloc_min_blks < igeo->ialloc_blks) do_sparse = prandom_u32() & 1; #endif @@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc( * Locking will ensure that we don't have two callers in here * at one time. */ - newlen = args.mp->m_ialloc_inos; - if (args.mp->m_maxicount && + newlen = igeo->ialloc_inos; + if (igeo->maxicount && percpu_counter_read_positive(&args.mp->m_icount) + newlen > - args.mp->m_maxicount) + igeo->maxicount) return -ENOSPC; - args.minlen = args.maxlen = args.mp->m_ialloc_blks; + args.minlen = args.maxlen = igeo->ialloc_blks; /* * First try to allocate inodes contiguous with the last-allocated * chunk of inodes. If the filesystem is striped, this will fill @@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc( newino = be32_to_cpu(agi->agi_newino); agno = be32_to_cpu(agi->agi_seqno); args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + - args.mp->m_ialloc_blks; + igeo->ialloc_blks; if (do_sparse) goto sparse_alloc; if (likely(newino != NULLAGINO && @@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc( * but not to use them in the actual exact allocation. */ args.alignment = 1; - args.minalignslop = args.mp->m_cluster_align - 1; + args.minalignslop = igeo->cluster_align - 1; /* Allow space for the inode btree to split. */ - args.minleft = args.mp->m_in_maxlevels - 1; + args.minleft = igeo->inobt_maxlevels - 1; if ((error = xfs_alloc_vextent(&args))) return error; @@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc( * pieces, so don't need alignment anyway. */ isaligned = 0; - if (args.mp->m_sinoalign) { + if (igeo->ialloc_align) { ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); args.alignment = args.mp->m_dalign; isaligned = 1; } else - args.alignment = args.mp->m_cluster_align; + args.alignment = igeo->cluster_align; /* * Need to figure out where to allocate the inode blocks. * Ideally they should be spaced out through the a.g. @@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc( /* * Allow space for the inode btree to split. */ - args.minleft = args.mp->m_in_maxlevels - 1; + args.minleft = igeo->inobt_maxlevels - 1; if ((error = xfs_alloc_vextent(&args))) return error; } @@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc( args.type = XFS_ALLOCTYPE_NEAR_BNO; args.agbno = be32_to_cpu(agi->agi_root); args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); - args.alignment = args.mp->m_cluster_align; + args.alignment = igeo->cluster_align; if ((error = xfs_alloc_vextent(&args))) return error; } @@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc( * the sparse allocation length is smaller than a full chunk. */ if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && - args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks && + igeo->ialloc_min_blks < igeo->ialloc_blks && args.fsbno == NULLFSBLOCK) { sparse_alloc: args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -773,7 +759,7 @@ sparse_alloc: args.alignment = args.mp->m_sb.sb_spino_align; args.prod = 1; - args.minlen = args.mp->m_ialloc_min_blks; + args.minlen = igeo->ialloc_min_blks; args.maxlen = args.minlen; /* @@ -789,7 +775,7 @@ sparse_alloc: args.min_agbno = args.mp->m_sb.sb_inoalignmt; args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, args.mp->m_sb.sb_inoalignmt) - - args.mp->m_ialloc_blks; + igeo->ialloc_blks; error = xfs_alloc_vextent(&args); if (error) @@ -1006,7 +992,7 @@ xfs_ialloc_ag_select( * space needed for alignment of inode chunks when checking the * longest contiguous free space in the AG - this prevents us * from getting ENOSPC because we have free space larger than - * m_ialloc_blks but alignment constraints prevent us from using + * ialloc_blks but alignment constraints prevent us from using * it. * * If we can't find an AG with space for full alignment slack to @@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select( * if we fail allocation due to alignment issues then it is most * likely a real ENOSPC condition. */ - ineed = mp->m_ialloc_min_blks; + ineed = M_IGEO(mp)->ialloc_min_blks; if (flags && ineed > 1) - ineed += mp->m_cluster_align; + ineed += M_IGEO(mp)->cluster_align; longest = pag->pagf_longest; if (!longest) longest = pag->pagf_flcount > 0; @@ -1703,6 +1689,7 @@ xfs_dialloc( int noroom = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(mp); int okalloc = 1; if (*IO_agbp) { @@ -1733,9 +1720,9 @@ xfs_dialloc( * Read rough value of mp->m_icount by percpu_counter_read_positive, * which will sacrifice the preciseness but improve the performance. */ - if (mp->m_maxicount && - percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos - > mp->m_maxicount) { + if (igeo->maxicount && + percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos + > igeo->maxicount) { noroom = 1; okalloc = 0; } @@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES); + M_IGEO(mp)->ialloc_blks, + &XFS_RMAP_OINFO_INODES); return; } @@ -2261,7 +2249,7 @@ xfs_imap_lookup( /* check that the returned record contains the required inode */ if (rec.ir_startino > agino || - rec.ir_startino + mp->m_ialloc_inos <= agino) + rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino) return -EINVAL; /* for untrusted inodes check it is allocated first */ @@ -2352,7 +2340,7 @@ xfs_imap( * If the inode cluster size is the same as the blocksize or * smaller we get to the buffer by simple arithmetics. */ - if (mp->m_blocks_per_cluster == 1) { + if (M_IGEO(mp)->blocks_per_cluster == 1) { offset = XFS_INO_TO_OFFSET(mp, ino); ASSERT(offset < mp->m_sb.sb_inopblock); @@ -2368,8 +2356,8 @@ xfs_imap( * find the location. Otherwise we have to do a btree * lookup to find the location. */ - if (mp->m_inoalign_mask) { - offset_agbno = agbno & mp->m_inoalign_mask; + if (M_IGEO(mp)->inoalign_mask) { + offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { error = xfs_imap_lookup(mp, tp, agno, agino, agbno, @@ -2381,13 +2369,13 @@ xfs_imap( out_map: ASSERT(agbno >= chunk_agbno); cluster_agbno = chunk_agbno + - ((offset_agbno / mp->m_blocks_per_cluster) * - mp->m_blocks_per_cluster); + ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) * + M_IGEO(mp)->blocks_per_cluster); offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + XFS_INO_TO_OFFSET(mp, ino); imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); - imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); + imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); /* @@ -2409,20 +2397,6 @@ out_map: } /* - * Compute and fill in value of m_in_maxlevels. - */ -void -xfs_ialloc_compute_maxlevels( - xfs_mount_t *mp) /* file system mount structure */ -{ - uint inodes; - - inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; - mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr, - inodes); -} - -/* * Log specified fields for the ag hdr (inode section). The growth of the agi * structure over time requires that we interpret the buffer as two logical * regions delineated by the end of the unlinked list. This is due to the size @@ -2493,7 +2467,7 @@ static xfs_failaddr_t xfs_agi_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); int i; @@ -2545,7 +2519,7 @@ static void xfs_agi_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; if (xfs_sb_version_hascrc(&mp->m_sb) && @@ -2562,7 +2536,7 @@ static void xfs_agi_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; @@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes( *freecount = ci.freecount; return 0; } + +/* + * Initialize inode-related geometry information. + * + * Compute the inode btree min and max levels and set maxicount. + * + * Set the inode cluster size. This may still be overridden by the file + * system block size if it is larger than the chosen cluster size. + * + * For v5 filesystems, scale the cluster size with the inode size to keep a + * constant ratio of inode per cluster buffer, but only if mkfs has set the + * inode alignment value appropriately for larger cluster sizes. + * + * Then compute the inode cluster alignment information. + */ +void +xfs_ialloc_setup_geometry( + struct xfs_mount *mp) +{ + struct xfs_sb *sbp = &mp->m_sb; + struct xfs_ino_geometry *igeo = M_IGEO(mp); + uint64_t icount; + uint inodes; + + /* Compute inode btree geometry. */ + igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog; + igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); + igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); + igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2; + igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2; + + igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, + sbp->sb_inopblock); + igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog; + + if (sbp->sb_spino_align) + igeo->ialloc_min_blks = sbp->sb_spino_align; + else + igeo->ialloc_min_blks = igeo->ialloc_blks; + + /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */ + inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG; + igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr, + inodes); + + /* Set the maximum inode count for this filesystem. */ + if (sbp->sb_imax_pct) { + /* + * Make sure the maximum inode count is a multiple + * of the units we allocate inodes in. + */ + icount = sbp->sb_dblocks * sbp->sb_imax_pct; + do_div(icount, 100); + do_div(icount, igeo->ialloc_blks); + igeo->maxicount = XFS_FSB_TO_INO(mp, + icount * igeo->ialloc_blks); + } else { + igeo->maxicount = 0; + } + + /* + * Compute the desired size of an inode cluster buffer size, which + * starts at 8K and (on v5 filesystems) scales up with larger inode + * sizes. + * + * Preserve the desired inode cluster size because the sparse inodes + * feature uses that desired size (not the actual size) to compute the + * sparse inode alignment. The mount code validates this value, so we + * cannot change the behavior. + */ + igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE; + if (xfs_sb_version_hascrc(&mp->m_sb)) { + int new_size = igeo->inode_cluster_size_raw; + + new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; + if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) + igeo->inode_cluster_size_raw = new_size; + } + + /* Calculate inode cluster ratios. */ + if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize) + igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp, + igeo->inode_cluster_size_raw); + else + igeo->blocks_per_cluster = 1; + igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster); + igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster); + + /* Calculate inode cluster alignment. */ + if (xfs_sb_version_hasalign(&mp->m_sb) && + mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster) + igeo->cluster_align = mp->m_sb.sb_inoalignmt; + else + igeo->cluster_align = 1; + igeo->inoalign_mask = igeo->cluster_align - 1; + igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align); + + /* + * If we are using stripe alignment, check whether + * the stripe unit is a multiple of the inode alignment + */ + if (mp->m_dalign && igeo->inoalign_mask && + !(mp->m_dalign & igeo->inoalign_mask)) + igeo->ialloc_align = mp->m_dalign; + else + igeo->ialloc_align = 0; +} diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index e936b7cc9389..323592d563d5 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -23,16 +23,6 @@ struct xfs_icluster { * sparse chunks */ }; -/* Calculate and return the number of filesystem blocks per inode cluster */ -static inline int -xfs_icluster_size_fsb( - struct xfs_mount *mp) -{ - if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) - return 1; - return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; -} - /* * Make an inode pointer out of the buffer/offset. */ @@ -96,13 +86,6 @@ xfs_imap( uint flags); /* flags for inode btree lookup */ /* - * Compute and fill in value of m_in_maxlevels. - */ -void -xfs_ialloc_compute_maxlevels( - struct xfs_mount *mp); /* file system mount structure */ - -/* * Log specified fields for the ag hdr (inode section) */ void @@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, int *stat); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); +void xfs_ialloc_setup_geometry(struct xfs_mount *mp); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index bc2dfacd2f4a..b82992f795aa 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -11,14 +11,12 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_rmap.h" @@ -28,7 +26,7 @@ xfs_inobt_get_minrecs( struct xfs_btree_cur *cur, int level) { - return cur->bc_mp->m_inobt_mnr[level != 0]; + return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0]; } STATIC struct xfs_btree_cur * @@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs( struct xfs_btree_cur *cur, int level) { - return cur->bc_mp->m_inobt_mxr[level != 0]; + return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0]; } STATIC void @@ -255,7 +253,7 @@ static xfs_failaddr_t xfs_inobt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); xfs_failaddr_t fa; unsigned int level; @@ -281,10 +279,11 @@ xfs_inobt_verify( /* level verification */ level = be16_to_cpu(block->bb_level); - if (level >= mp->m_in_maxlevels) + if (level >= M_IGEO(mp)->inobt_maxlevels) return __this_address; - return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); + return xfs_btree_sblock_verify(bp, + M_IGEO(mp)->inobt_mxr[level != 0]); } static void @@ -546,7 +545,7 @@ xfs_inobt_max_size( xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); /* Bail out if we're uninitialized, which can happen in mkfs. */ - if (mp->m_inobt_mxr[0] == 0) + if (M_IGEO(mp)->inobt_mxr[0] == 0) return 0; /* @@ -558,11 +557,41 @@ xfs_inobt_max_size( XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) agblocks -= mp->m_sb.sb_logblocks; - return xfs_btree_calc_size(mp->m_inobt_mnr, + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, (uint64_t)agblocks * mp->m_sb.sb_inopblock / XFS_INODES_PER_CHUNK); } +/* Read AGI and create inobt cursor. */ +int +xfs_inobt_cur( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + xfs_btnum_t which, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp) +{ + struct xfs_btree_cur *cur; + int error; + + ASSERT(*agi_bpp == NULL); + ASSERT(*curpp == NULL); + + error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp); + if (error) + return error; + + cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); + if (!cur) { + xfs_trans_brelse(tp, *agi_bpp); + *agi_bpp = NULL; + return -ENOMEM; + } + *curpp = cur; + return 0; +} + static int xfs_inobt_count_blocks( struct xfs_mount *mp, @@ -571,15 +600,14 @@ xfs_inobt_count_blocks( xfs_btnum_t btnum, xfs_extlen_t *tree_blocks) { - struct xfs_buf *agbp; - struct xfs_btree_cur *cur; + struct xfs_buf *agbp = NULL; + struct xfs_btree_cur *cur = NULL; int error; - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp); if (error) return error; - cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); error = xfs_btree_count_blocks(cur, tree_blocks); xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); @@ -619,5 +647,5 @@ xfs_iallocbt_calc_size( struct xfs_mount *mp, unsigned long long len) { - return xfs_btree_calc_size(mp->m_inobt_mnr, len); + return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index ebdd0c6b8766..951305ecaae1 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, unsigned long long len); +int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, xfs_btnum_t btnum, + struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); #endif /* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index bc690f2409fa..27aa3f2bc4bc 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -3,18 +3,14 @@ * Copyright (c) 2017 Christoph Hellwig. */ -#include <linux/cache.h> -#include <linux/kernel.h> -#include <linux/slab.h> #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap.h" #include "xfs_trace.h" /* diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index e021d5133ccb..28ab3c5255e1 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -10,11 +10,9 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_cksum.h" #include "xfs_icache.h" #include "xfs_trans.h" #include "xfs_ialloc.h" @@ -33,12 +31,9 @@ xfs_inobp_check( xfs_buf_t *bp) { int i; - int j; xfs_dinode_t *dip; - j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - - for (i = 0; i < j; i++) { + for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) { dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); if (!dip->di_next_unlinked) { xfs_alert(mp, @@ -80,7 +75,7 @@ xfs_inode_buf_verify( struct xfs_buf *bp, bool readahead) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_agnumber_t agno; int i; int ni; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index f9acf1d436f6..bf3e04018246 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -3,10 +3,10 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/log2.h> #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -19,12 +19,10 @@ #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_attr_sf.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2_priv.h" #include "xfs_attr_leaf.h" -#include "xfs_shared.h" kmem_zone_t *xfs_ifork_zone; diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index 1b542ec11d5d..7f55eb3f3653 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -12,9 +12,7 @@ #include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_trans_space.h" -#include "xfs_inode.h" #include "xfs_da_btree.h" -#include "xfs_attr_leaf.h" #include "xfs_bmap_btree.h" /* diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 542aa1475b5f..51bb9bdb0e84 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -9,7 +9,6 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_btree.h" @@ -19,7 +18,6 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_bit.h" #include "xfs_refcount.h" diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 5d9de9b21726..38529dbacd55 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -12,12 +12,10 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_btree.h" -#include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_bit.h" #include "xfs_rmap.h" @@ -203,7 +201,7 @@ STATIC xfs_failaddr_t xfs_refcountbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 8ed885507dd8..e6aeb390b2fb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -10,24 +10,17 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_btree.h" #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_extent_busy.h" -#include "xfs_bmap.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" /* * Lookup the first record less than or equal to [bno, len, owner, offset] diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 5d1f8884c888..fc78efa52c94 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -9,18 +9,14 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_ag_resv.h" @@ -292,7 +288,7 @@ static xfs_failaddr_t xfs_rmapbt_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index eaaff67e9626..8ea1efc97b41 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -13,15 +13,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_trans.h" -#include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_buf.h" -#include "xfs_icache.h" #include "xfs_rtalloc.h" diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index e76a3e5d28d7..a08dd8f40346 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -10,26 +10,19 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" -#include "xfs_bmap.h" #include "xfs_refcount_btree.h" #include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_health.h" /* @@ -686,7 +679,7 @@ xfs_sb_read_verify( struct xfs_buf *bp) { struct xfs_sb sb; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); int error; @@ -752,7 +745,7 @@ xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_sb sb; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; int error; @@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { * * Mount initialization code establishing various mount * fields from the superblock associated with the given - * mount structure + * mount structure. + * + * Inode geometry are calculated in xfs_ialloc_setup_geometry. */ void xfs_sb_mount_common( - struct xfs_mount *mp, - struct xfs_sb *sbp) + struct xfs_mount *mp, + struct xfs_sb *sbp) { mp->m_agfrotor = mp->m_agirotor = 0; mp->m_maxagi = mp->m_sb.sb_agcount; @@ -813,7 +808,6 @@ xfs_sb_mount_common( mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; - mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog; mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; mp->m_blockwmask = mp->m_blockwsize - 1; @@ -823,11 +817,6 @@ xfs_sb_mount_common( mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; - mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1); - mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0); - mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2; - mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2; - mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; @@ -844,14 +833,6 @@ xfs_sb_mount_common( mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); - mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK, - sbp->sb_inopblock); - mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog; - - if (sbp->sb_spino_align) - mp->m_ialloc_min_blks = sbp->sb_spino_align; - else - mp->m_ialloc_min_blks = mp->m_ialloc_blks; mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); } @@ -939,7 +920,7 @@ xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); + struct xfs_buf *bp = xfs_trans_getsb(tp, mp); mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); @@ -1005,7 +986,7 @@ xfs_update_secondary_sbs( bp = xfs_buf_get(mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), - XFS_FSS_TO_BB(mp, 1), 0); + XFS_FSS_TO_BB(mp, 1)); /* * If we get an error reading or writing alternate superblocks, * continue. xfs_repair chooses the "best" superblock based @@ -1069,7 +1050,7 @@ xfs_sync_sb_buf( if (error) return error; - bp = xfs_trans_getsb(tp, mp, 0); + bp = xfs_trans_getsb(tp, mp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); xfs_trans_set_sync(tp); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 4e909791aeac..e0641b7337b3 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -65,7 +65,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ -#define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */ /* * LOWMODE is used by the allocator to activate the lowspace algorithm - when * free space is running low the extent allocator may choose to allocate an @@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); +/* Computed inode geometry for the filesystem. */ +struct xfs_ino_geometry { + /* Maximum inode count in this filesystem. */ + uint64_t maxicount; + + /* Actual inode cluster buffer size, in bytes. */ + unsigned int inode_cluster_size; + + /* + * Desired inode cluster buffer size, in bytes. This value is not + * rounded up to at least one filesystem block, which is necessary for + * the sole purpose of validating sb_spino_align. Runtime code must + * only ever use inode_cluster_size. + */ + unsigned int inode_cluster_size_raw; + + /* Inode cluster sizes, adjusted to be at least 1 fsb. */ + unsigned int inodes_per_cluster; + unsigned int blocks_per_cluster; + + /* Inode cluster alignment. */ + unsigned int cluster_align; + unsigned int cluster_align_inodes; + unsigned int inoalign_mask; /* mask sb_inoalignmt if used */ + + unsigned int inobt_mxr[2]; /* max inobt btree records */ + unsigned int inobt_mnr[2]; /* min inobt btree records */ + unsigned int inobt_maxlevels; /* max inobt btree levels. */ + + /* Size of inode allocations under normal operation. */ + unsigned int ialloc_inos; + unsigned int ialloc_blks; + + /* Minimum inode blocks for a sparse allocation. */ + unsigned int ialloc_min_blks; + + /* stripe unit inode alignment */ + unsigned int ialloc_align; + + unsigned int agino_log; /* #bits for agino in inum */ +}; + +/* Keep iterating the data structure. */ +#define XFS_ITER_CONTINUE (0) + +/* Stop iterating the data structure. */ +#define XFS_ITER_ABORT (1) + #endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index a0ccc253c43d..3b8260ca7d1b 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -11,12 +11,8 @@ #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_error.h" -#include "xfs_trace.h" -#include "xfs_symlink.h" -#include "xfs_cksum.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" @@ -90,7 +86,7 @@ static xfs_failaddr_t xfs_symlink_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_dsymlink_hdr *dsl = bp->b_addr; if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -116,7 +112,7 @@ static void xfs_symlink_read_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; /* no verification of non-crc buffers */ @@ -136,7 +132,7 @@ static void xfs_symlink_write_verify( struct xfs_buf *bp) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; xfs_failaddr_t fa; diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 83f4ee2afc49..d12bbd526e7c 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -15,12 +15,10 @@ #include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_bmap_btree.h" -#include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_trans.h" #include "xfs_qm.h" #include "xfs_trans_space.h" -#include "xfs_trace.h" #define _ALLOC true #define _FREE false @@ -136,9 +134,10 @@ STATIC uint xfs_calc_inobt_res( struct xfs_mount *mp) { - return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + - xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), - XFS_FSB_TO_B(mp, 1)); + return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels, + XFS_FSB_TO_B(mp, 1)) + + xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), + XFS_FSB_TO_B(mp, 1)); } /* @@ -167,7 +166,7 @@ xfs_calc_finobt_res( * includes: * * the allocation btrees: 2 trees * (max depth - 1) * block size - * the inode chunk: m_ialloc_blks * N + * the inode chunk: m_ino_geo.ialloc_blks * N * * The size N of the inode chunk reservation depends on whether it is for * allocation or free and which type of create transaction is in use. An inode @@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res( size = XFS_FSB_TO_B(mp, 1); } - res += xfs_calc_buf_res(mp->m_ialloc_blks, size); + res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size); return res; } @@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation( struct xfs_mount *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + 2 * M_IGEO(mp)->inode_cluster_size; } /* @@ -345,7 +344,7 @@ STATIC uint xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) { return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + - max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); + M_IGEO(mp)->inode_cluster_size; } /* diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h index a62fb950bef1..88221c7a04cc 100644 --- a/fs/xfs/libxfs/xfs_trans_space.h +++ b/fs/xfs/libxfs/xfs_trans_space.h @@ -56,9 +56,9 @@ #define XFS_DIRREMOVE_SPACE_RES(mp) \ XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) #define XFS_IALLOC_SPACE_RES(mp) \ - ((mp)->m_ialloc_blks + \ + (M_IGEO(mp)->ialloc_blks + \ (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ - ((mp)->m_in_maxlevels - 1))) + (M_IGEO(mp)->inobt_maxlevels - 1))) /* * Space reservation values for various transactions. @@ -94,7 +94,8 @@ #define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) #define XFS_IFREE_SPACE_RES(mp) \ - (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) + (xfs_sb_version_hasfinobt(&mp->m_sb) ? \ + M_IGEO(mp)->inobt_maxlevels : 0) #endif /* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index d51acc95bc00..4f595546a639 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -7,19 +7,10 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_format.h" -#include "xfs_log_format.h" #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_rmap.h" -#include "xfs_alloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" /* Find the size of the AG, in blocks. */ xfs_agblock_t @@ -87,14 +78,14 @@ xfs_agino_range( * Calculate the first inode, which will be in the first * cluster-aligned block after the AGFL. */ - bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align); + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); *first = XFS_AGB_TO_AGINO(mp, bno); /* * Calculate the last inode, which will be at the end of the * last (aligned) cluster that can be allocated in the AG. */ - bno = round_down(eoag, mp->m_cluster_align); + bno = round_down(eoag, M_IGEO(mp)->cluster_align); *last = XFS_AGB_TO_AGINO(mp, bno) - 1; } diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index adaeabdefdd3..16b09b941441 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -9,20 +9,13 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_rmap.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Superblock */ @@ -646,7 +639,7 @@ xchk_agfl_block( xchk_agfl_block_xref(sc, agbno); if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - return XFS_BTREE_QUERY_RANGE_ABORT; + return XFS_ITER_ABORT; return 0; } @@ -737,7 +730,7 @@ xchk_agfl( /* Check the blocks in the AGFL. */ error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), sc->sa.agfl_bp, xchk_agfl_block, &sai); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) { + if (error == XFS_ITER_ABORT) { error = 0; goto out_free; } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 64e31f87d490..7a1a38b636a9 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -9,22 +9,17 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_refcount.h" #include "xfs_refcount_btree.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 44883e9112ad..a43d1813c4ff 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -9,19 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_alloc.h" #include "xfs_rmap.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Set us up to scrub free space btrees. diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index dce74ec57038..1afc58bf71dd 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -9,26 +9,62 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" -#include "xfs_dir2.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/dabtree.h" -#include "scrub/trace.h" +#include "scrub/attr.h" -#include <linux/posix_acl_xattr.h> -#include <linux/xattr.h> +/* + * Allocate enough memory to hold an attr value and attr block bitmaps, + * reallocating the buffer if necessary. Buffer contents are not preserved + * across a reallocation. + */ +int +xchk_setup_xattr_buf( + struct xfs_scrub *sc, + size_t value_size, + xfs_km_flags_t flags) +{ + size_t sz; + struct xchk_xattr_buf *ab = sc->buf; + + /* + * We need enough space to read an xattr value from the file or enough + * space to hold three copies of the xattr free space bitmap. We don't + * need the buffer space for both purposes at the same time. + */ + sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); + sz = max_t(size_t, sz, value_size); + + /* + * If there's already a buffer, figure out if we need to reallocate it + * to accommodate a larger size. + */ + if (ab) { + if (sz <= ab->sz) + return 0; + kmem_free(ab); + sc->buf = NULL; + } + + /* + * Don't zero the buffer upon allocation to avoid runtime overhead. + * All users must be careful never to read uninitialized contents. + */ + ab = kmem_alloc_large(sizeof(*ab) + sz, flags); + if (!ab) + return -ENOMEM; + + ab->sz = sz; + sc->buf = ab; + return 0; +} /* Set us up to scrub an inode's extended attributes. */ int @@ -36,19 +72,18 @@ xchk_setup_xattr( struct xfs_scrub *sc, struct xfs_inode *ip) { - size_t sz; + int error; /* - * Allocate the buffer without the inode lock held. We need enough - * space to read every xattr value in the file or enough space to - * hold three copies of the xattr free space bitmap. (Not both at - * the same time.) + * We failed to get memory while checking attrs, so this time try to + * get all the memory we're ever going to need. Allocate the buffer + * without the inode lock held, which means we can sleep. */ - sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * - BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); - sc->buf = kmem_zalloc_large(sz, KM_SLEEP); - if (!sc->buf) - return -ENOMEM; + if (sc->flags & XCHK_TRY_HARDER) { + error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP); + if (error) + return error; + } return xchk_setup_inode_contents(sc, ip, 0); } @@ -83,7 +118,7 @@ xchk_xattr_listent( sx = container_of(context, struct xchk_xattr, context); if (xchk_should_terminate(sx->sc, &error)) { - context->seen_enough = 1; + context->seen_enough = error; return; } @@ -99,6 +134,19 @@ xchk_xattr_listent( return; } + /* + * Try to allocate enough memory to extrat the attr value. If that + * doesn't work, we overload the seen_enough variable to convey + * the error message back to the main scrub function. + */ + error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) { + context->seen_enough = error; + return; + } + args.flags = ATTR_KERNOTIME; if (flags & XFS_ATTR_ROOT) args.flags |= ATTR_ROOT; @@ -111,8 +159,8 @@ xchk_xattr_listent( args.namelen = namelen; args.hashval = xfs_da_hashname(args.name, args.namelen); args.trans = context->tp; - args.value = sx->sc->buf; - args.valuelen = XATTR_SIZE_MAX; + args.value = xchk_xattr_valuebuf(sx->sc); + args.valuelen = valuelen; error = xfs_attr_get_ilocked(context->dp, &args); if (error == -EEXIST) @@ -125,7 +173,7 @@ xchk_xattr_listent( args.blkno); fail_xref: if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - context->seen_enough = 1; + context->seen_enough = XFS_ITER_ABORT; return; } @@ -170,13 +218,12 @@ xchk_xattr_check_freemap( unsigned long *map, struct xfs_attr3_icleaf_hdr *leafhdr) { - unsigned long *freemap; - unsigned long *dstmap; + unsigned long *freemap = xchk_xattr_freemap(sc); + unsigned long *dstmap = xchk_xattr_dstmap(sc); unsigned int mapsize = sc->mp->m_attr_geo->blksize; int i; /* Construct bitmap of freemap contents. */ - freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize); bitmap_zero(freemap, mapsize); for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { if (!xchk_xattr_set_map(sc, freemap, @@ -186,7 +233,6 @@ xchk_xattr_check_freemap( } /* Look for bits that are set in freemap and are marked in use. */ - dstmap = freemap + BITS_TO_LONGS(mapsize); return bitmap_and(dstmap, freemap, map, mapsize) == 0; } @@ -201,13 +247,13 @@ xchk_xattr_entry( char *buf_end, struct xfs_attr_leafblock *leaf, struct xfs_attr3_icleaf_hdr *leafhdr, - unsigned long *usedmap, struct xfs_attr_leaf_entry *ent, int idx, unsigned int *usedbytes, __u32 *last_hashval) { struct xfs_mount *mp = ds->state->mp; + unsigned long *usedmap = xchk_xattr_usedmap(ds->sc); char *name_end; struct xfs_attr_leaf_name_local *lentry; struct xfs_attr_leaf_name_remote *rentry; @@ -267,16 +313,26 @@ xchk_xattr_block( struct xfs_attr_leafblock *leaf = bp->b_addr; struct xfs_attr_leaf_entry *ent; struct xfs_attr_leaf_entry *entries; - unsigned long *usedmap = ds->sc->buf; + unsigned long *usedmap; char *buf_end; size_t off; __u32 last_hashval = 0; unsigned int usedbytes = 0; unsigned int hdrsize; int i; + int error; if (*last_checked == blk->blkno) return 0; + + /* Allocate memory for block usage checking. */ + error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL); + if (error == -ENOMEM) + return -EDEADLOCK; + if (error) + return error; + usedmap = xchk_xattr_usedmap(ds->sc); + *last_checked = blk->blkno; bitmap_zero(usedmap, mp->m_attr_geo->blksize); @@ -324,7 +380,7 @@ xchk_xattr_block( /* Check the entry and nameval. */ xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr, - usedmap, ent, i, &usedbytes, &last_hashval); + ent, i, &usedbytes, &last_hashval); if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) goto out; @@ -464,6 +520,10 @@ xchk_xattr( error = xfs_attr_list_int_ilocked(&sx.context); if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) goto out; + + /* Did our listent function try to return any errors? */ + if (sx.context.seen_enough < 0) + error = sx.context.seen_enough; out: return error; } diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h new file mode 100644 index 000000000000..13a1d2e8424d --- /dev/null +++ b/fs/xfs/scrub/attr.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_SCRUB_ATTR_H__ +#define __XFS_SCRUB_ATTR_H__ + +/* + * Temporary storage for online scrub and repair of extended attributes. + */ +struct xchk_xattr_buf { + /* Size of @buf, in bytes. */ + size_t sz; + + /* + * Memory buffer -- either used for extracting attr values while + * walking the attributes; or for computing attr block bitmaps when + * checking the attribute tree. + * + * Each bitmap contains enough bits to track every byte in an attr + * block (rounded up to the size of an unsigned long). The attr block + * used space bitmap starts at the beginning of the buffer; the free + * space bitmap follows immediately after; and we have a third buffer + * for storing intermediate bitmap results. + */ + uint8_t buf[0]; +}; + +/* A place to store attribute values. */ +static inline uint8_t * +xchk_xattr_valuebuf( + struct xfs_scrub *sc) +{ + struct xchk_xattr_buf *ab = sc->buf; + + return ab->buf; +} + +/* A bitmap of space usage computed by walking an attr leaf block. */ +static inline unsigned long * +xchk_xattr_usedmap( + struct xfs_scrub *sc) +{ + struct xchk_xattr_buf *ab = sc->buf; + + return (unsigned long *)ab->buf; +} + +/* A bitmap of free space computed by walking attr leaf block free info. */ +static inline unsigned long * +xchk_xattr_freemap( + struct xfs_scrub *sc) +{ + return xchk_xattr_usedmap(sc) + + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); +} + +/* A bitmap used to hold temporary results. */ +static inline unsigned long * +xchk_xattr_dstmap( + struct xfs_scrub *sc) +{ + return xchk_xattr_freemap(sc) + + BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); +} + +int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size, + xfs_km_flags_t flags); + +#endif /* __XFS_SCRUB_ATTR_H__ */ diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c index fdadc9e1dc49..3d47d111be5a 100644 --- a/fs/xfs/scrub/bitmap.c +++ b/fs/xfs/scrub/bitmap.c @@ -10,11 +10,6 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_btree.h" -#include "scrub/xfs_scrub.h" -#include "scrub/scrub.h" -#include "scrub/common.h" -#include "scrub/trace.h" -#include "scrub/repair.h" #include "scrub/bitmap.h" /* diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index a703cd58a90e..1bd29fdc2ab5 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -9,27 +9,19 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" #include "xfs_alloc.h" -#include "xfs_rtalloc.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_refcount.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* Set us up with an inode's bmap. */ int diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 117910db51b8..f52a7b8256f9 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -9,14 +9,7 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 973aa59975e3..18876056e5e0 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -9,22 +9,16 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" -#include "xfs_itable.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" -#include "xfs_bmap.h" -#include "xfs_bmap_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" -#include "xfs_refcount.h" #include "xfs_refcount_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" @@ -32,11 +26,9 @@ #include "xfs_trans_priv.h" #include "xfs_attr.h" #include "xfs_reflink.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -#include "scrub/btree.h" #include "scrub/repair.h" #include "scrub/health.h" diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index 90527b094878..94c4f1de1922 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -9,20 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_attr_leaf.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index a38a22785a1a..1e2e11721eb9 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -9,24 +9,14 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" -#include "xfs_itable.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_ialloc.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" #include "scrub/dabtree.h" /* Set us up to scrub directories. */ diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 07c11e3e6437..fc3f510c9034 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -9,22 +9,10 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" -#include "xfs_rmap.h" -#include "xfs_error.h" -#include "xfs_errortag.h" -#include "xfs_icache.h" #include "xfs_health.h" -#include "xfs_bmap.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 23cf8e2f25db..b2f602811e9d 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -7,18 +7,10 @@ #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" #include "xfs_sb.h" -#include "xfs_inode.h" #include "xfs_health.h" #include "scrub/scrub.h" -#include "scrub/health.h" /* * Scrub and In-Core Filesystem Health Assessments diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 9b47117180cb..681758704fda 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -9,21 +9,14 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_icache.h" #include "xfs_rmap.h" -#include "xfs_log.h" -#include "xfs_trans_priv.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" @@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster( int error = 0; nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, - mp->m_inodes_per_cluster); + M_IGEO(mp)->inodes_per_cluster); /* Map this inode cluster */ agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); @@ -251,7 +244,7 @@ xchk_iallocbt_check_cluster( */ ir_holemask = (irec->ir_holemask & cluster_mask); imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); - imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); + imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster); imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) << mp->m_sb.sb_inodelog; @@ -276,12 +269,12 @@ xchk_iallocbt_check_cluster( /* If any part of this is a hole, skip it. */ if (ir_holemask) { xchk_xref_is_not_owned_by(bs->sc, agbno, - mp->m_blocks_per_cluster, + M_IGEO(mp)->blocks_per_cluster, &XFS_RMAP_OINFO_INODES); return 0; } - xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, + xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster, &XFS_RMAP_OINFO_INODES); /* Grab the inode cluster buffer. */ @@ -333,7 +326,7 @@ xchk_iallocbt_check_clusters( */ for (cluster_base = 0; cluster_base < XFS_INODES_PER_CHUNK; - cluster_base += bs->sc->mp->m_inodes_per_cluster) { + cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) { error = xchk_iallocbt_check_cluster(bs, irec, cluster_base); if (error) break; @@ -355,6 +348,7 @@ xchk_iallocbt_rec_alignment( { struct xfs_mount *mp = bs->sc->mp; struct xchk_iallocbt *iabt = bs->private; + struct xfs_ino_geometry *igeo = M_IGEO(mp); /* * finobt records have different positioning requirements than inobt @@ -372,7 +366,7 @@ xchk_iallocbt_rec_alignment( unsigned int imask; imask = min_t(unsigned int, XFS_INODES_PER_CHUNK, - mp->m_cluster_align_inodes) - 1; + igeo->cluster_align_inodes) - 1; if (irec->ir_startino & imask) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return; @@ -400,17 +394,17 @@ xchk_iallocbt_rec_alignment( } /* inobt records must be aligned to cluster and inoalignmnt size. */ - if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) { + if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return; } - if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) { + if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); return; } - if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK) + if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK) return; /* @@ -419,7 +413,7 @@ xchk_iallocbt_rec_alignment( * after this one. */ iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK; - iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster; + iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster; } /* Scrub an inobt/finobt record. */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index e213efc194a1..6d483ab29e63 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -9,27 +9,17 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_icache.h" -#include "xfs_inode_buf.h" -#include "xfs_inode_fork.h" #include "xfs_ialloc.h" #include "xfs_da_format.h" #include "xfs_reflink.h" #include "xfs_rmap.h" -#include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Grab total control of the inode metadata. It doesn't matter here if diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index d5d197f1b80f..c962bd534690 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -9,21 +9,13 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_ialloc.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Set us up to scrub parents. */ int diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 5dfe2b5924db..0a33b4421c32 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -9,24 +9,13 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_qm.h" -#include "xfs_dquot.h" -#include "xfs_dquot_item.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Convert a scrub type code to a DQ flag, or return 0 if error. */ static inline uint @@ -144,7 +133,7 @@ xchk_quota_item( if (bsoft > bhard) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); - if (ihard > mp->m_maxicount) + if (ihard > M_IGEO(mp)->maxicount) xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); if (isoft > ihard) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 708b4158eb90..93b3793bc5b3 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -7,22 +7,12 @@ #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_alloc.h" #include "xfs_rmap.h" #include "xfs_refcount.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Set us up to scrub reference count btrees. diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index eb358f0f5e0a..4cfeec57fb05 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -9,29 +9,21 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_icache.h" #include "xfs_alloc.h" #include "xfs_alloc_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" -#include "xfs_refcount.h" #include "xfs_refcount_btree.h" #include "xfs_extent_busy.h" #include "xfs_ag_resv.h" -#include "xfs_trans_space.h" #include "xfs_quota.h" -#include "xfs_attr.h" -#include "xfs_reflink.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -357,7 +349,7 @@ xrep_init_btblock( bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); - xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0); + xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(tp, bp, 0, bp->b_length); bp->b_ops = ops; @@ -672,7 +664,7 @@ xrep_findroot_agfl_walk( { xfs_agblock_t *agbno = priv; - return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0; + return (*agbno == bno) ? XFS_ITER_ABORT : 0; } /* Does this block match the btree information passed in? */ @@ -702,7 +694,7 @@ xrep_findroot_block( if (owner == XFS_RMAP_OWN_AG) { error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, xrep_findroot_agfl_walk, &agbno); - if (error == XFS_BTREE_QUERY_RANGE_ABORT) + if (error == XFS_ITER_ABORT) return 0; if (error) return error; diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 92a140c5b55e..8d4cefd761c1 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -9,21 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_btree.h" -#include "xfs_bit.h" -#include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" #include "xfs_rmap.h" #include "xfs_refcount.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" -#include "scrub/trace.h" /* * Set us up to scrub reverse mapping btrees. diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index dbe115b075f7..c642bc206c41 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -9,19 +9,12 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_inode.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Set us up with the realtime metadata locked. */ int diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index f630389ee176..15c8c5f3f688 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -9,36 +9,16 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_icache.h" -#include "xfs_itable.h" -#include "xfs_alloc.h" -#include "xfs_alloc_btree.h" -#include "xfs_bmap.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc.h" -#include "xfs_ialloc_btree.h" -#include "xfs_refcount.h" -#include "xfs_refcount_btree.h" -#include "xfs_rmap.h" -#include "xfs_rmap_btree.h" #include "xfs_quota.h" #include "xfs_qm.h" #include "xfs_errortag.h" #include "xfs_error.h" -#include "xfs_log.h" -#include "xfs_trans_priv.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -#include "scrub/btree.h" #include "scrub/repair.h" #include "scrub/health.h" diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c index f7ebaa946999..99c0b1234c3c 100644 --- a/fs/xfs/scrub/symlink.c +++ b/fs/xfs/scrub/symlink.c @@ -9,19 +9,11 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_btree.h" -#include "xfs_bit.h" #include "xfs_log_format.h" -#include "xfs_trans.h" -#include "xfs_sb.h" #include "xfs_inode.h" -#include "xfs_inode_fork.h" #include "xfs_symlink.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" -#include "scrub/trace.h" /* Set us up to scrub a symbolic link. */ int diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 96feaf8dcdec..9eaab2eb5ed3 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -10,15 +10,9 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_trans.h" -#include "xfs_bit.h" -#include "scrub/xfs_scrub.h" #include "scrub/scrub.h" -#include "scrub/common.h" /* Figure out which block the btree cursor was pointing to. */ static inline xfs_fsblock_t diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 8039e35147dd..cbda40d40326 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -4,16 +4,14 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_acl.h" #include "xfs_attr.h" #include "xfs_trace.h" -#include <linux/slab.h> -#include <linux/xattr.h> #include <linux/posix_acl_xattr.h> diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 11f703d4a605..761248ee2778 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -12,16 +12,11 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_bmap_btree.h" #include "xfs_reflink.h" -#include <linux/writeback.h> /* * structure owned by writepages passed to individual writepage calls @@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc( struct xfs_trans *tp; int error; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, - XFS_TRANS_NOFS, &tp); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); if (error) return error; @@ -240,9 +234,17 @@ xfs_end_ioend( struct xfs_inode *ip = XFS_I(ioend->io_inode); xfs_off_t offset = ioend->io_offset; size_t size = ioend->io_size; + unsigned int nofs_flag; int error; /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + + /* * Just clean up the in-memory strutures if the fs has been shut down. */ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { @@ -282,6 +284,8 @@ done: list_del_init(&ioend->io_list); xfs_destroy_ioend(ioend, error); } + + memalloc_nofs_restore(nofs_flag); } /* @@ -290,13 +294,9 @@ done: static bool xfs_ioend_can_merge( struct xfs_ioend *ioend, - int ioend_error, struct xfs_ioend *next) { - int next_error; - - next_error = blk_status_to_errno(next->io_bio->bi_status); - if (ioend_error != next_error) + if (ioend->io_bio->bi_status != next->io_bio->bi_status) return false; if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) return false; @@ -305,11 +305,28 @@ xfs_ioend_can_merge( return false; if (ioend->io_offset + ioend->io_size != next->io_offset) return false; - if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next)) - return false; return true; } +/* + * If the to be merged ioend has a preallocated transaction for file + * size updates we need to ensure the ioend it is merged into also + * has one. If it already has one we can simply cancel the transaction + * as it is guaranteed to be clean. + */ +static void +xfs_ioend_merge_append_transactions( + struct xfs_ioend *ioend, + struct xfs_ioend *next) +{ + if (!ioend->io_append_trans) { + ioend->io_append_trans = next->io_append_trans; + next->io_append_trans = NULL; + } else { + xfs_setfilesize_ioend(next, -ECANCELED); + } +} + /* Try to merge adjacent completions. */ STATIC void xfs_ioend_try_merge( @@ -317,25 +334,16 @@ xfs_ioend_try_merge( struct list_head *more_ioends) { struct xfs_ioend *next_ioend; - int ioend_error; - int error; - - if (list_empty(more_ioends)) - return; - - ioend_error = blk_status_to_errno(ioend->io_bio->bi_status); while (!list_empty(more_ioends)) { next_ioend = list_first_entry(more_ioends, struct xfs_ioend, io_list); - if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend)) + if (!xfs_ioend_can_merge(ioend, next_ioend)) break; list_move_tail(&next_ioend->io_list, &ioend->io_list); ioend->io_size += next_ioend->io_size; - if (ioend->io_append_trans) { - error = xfs_setfilesize_ioend(next_ioend, 1); - ASSERT(error == 1); - } + if (next_ioend->io_append_trans) + xfs_ioend_merge_append_transactions(ioend, next_ioend); } } @@ -626,7 +634,7 @@ allocate_blocks: * reference to the ioend to ensure that the ioend completion is only done once * all bios have been submitted and the ioend is really done. * - * If @fail is non-zero, it means that we have a situation where some part of + * If @status is non-zero, it means that we have a situation where some part of * the submission process has failed after we have marked paged for writeback * and unlocked them. In this situation, we need to fail the bio and ioend * rather than submit it to IO. This typically only happens on a filesystem @@ -638,21 +646,19 @@ xfs_submit_ioend( struct xfs_ioend *ioend, int status) { + unsigned int nofs_flag; + + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + /* Convert CoW extents to regular */ if (!status && ioend->io_fork == XFS_COW_FORK) { - /* - * Yuk. This can do memory allocation, but is not a - * transactional operation so everything is done in GFP_KERNEL - * context. That can deadlock, because we hold pages in - * writeback state and GFP_KERNEL allocations can block on them. - * Hence we must operate in nofs conditions here. - */ - unsigned nofs_flag; - - nofs_flag = memalloc_nofs_save(); status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), ioend->io_offset, ioend->io_size); - memalloc_nofs_restore(nofs_flag); } /* Reserve log space if we might write beyond the on-disk inode size. */ @@ -663,9 +669,10 @@ xfs_submit_ioend( !ioend->io_append_trans) status = xfs_setfilesize_trans_alloc(ioend); + memalloc_nofs_restore(nofs_flag); + ioend->io_bio->bi_private = ioend; ioend->io_bio->bi_end_io = xfs_end_bio; - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); /* * If we are failing the IO now, just mark the ioend with an @@ -679,7 +686,6 @@ xfs_submit_ioend( return status; } - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; submit_bio(ioend->io_bio); return 0; } @@ -691,7 +697,8 @@ xfs_alloc_ioend( xfs_exntst_t state, xfs_off_t offset, struct block_device *bdev, - sector_t sector) + sector_t sector, + struct writeback_control *wbc) { struct xfs_ioend *ioend; struct bio *bio; @@ -699,6 +706,9 @@ xfs_alloc_ioend( bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = sector; + bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); + bio->bi_write_hint = inode->i_write_hint; + wbc_init_bio(wbc, bio); ioend = container_of(bio, struct xfs_ioend, io_inline_bio); INIT_LIST_HEAD(&ioend->io_list); @@ -719,24 +729,22 @@ xfs_alloc_ioend( * so that the bi_private linkage is set up in the right direction for the * traversal in xfs_destroy_ioend(). */ -static void +static struct bio * xfs_chain_bio( - struct xfs_ioend *ioend, - struct writeback_control *wbc, - struct block_device *bdev, - sector_t sector) + struct bio *prev) { struct bio *new; new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); - bio_set_dev(new, bdev); - new->bi_iter.bi_sector = sector; - bio_chain(ioend->io_bio, new); - bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ - ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); - ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; - submit_bio(ioend->io_bio); - ioend->io_bio = new; + bio_copy_dev(new, prev);/* also copies over blkcg information */ + new->bi_iter.bi_sector = bio_end_sector(prev); + new->bi_opf = prev->bi_opf; + new->bi_write_hint = prev->bi_write_hint; + + bio_chain(prev, new); + bio_get(prev); /* for xfs_destroy_ioend */ + submit_bio(prev); + return new; } /* @@ -772,7 +780,7 @@ xfs_add_to_ioend( if (wpc->ioend) list_add(&wpc->ioend->io_list, iolist); wpc->ioend = xfs_alloc_ioend(inode, wpc->fork, - wpc->imap.br_state, offset, bdev, sector); + wpc->imap.br_state, offset, bdev, sector, wbc); } merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, @@ -783,11 +791,12 @@ xfs_add_to_ioend( if (!merged) { if (bio_full(wpc->ioend->io_bio, len)) - xfs_chain_bio(wpc->ioend, wbc, bdev, sector); + wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio); bio_add_page(wpc->ioend->io_bio, page, len, poff); } wpc->ioend->io_size += len; + wbc_account_io(wbc, page, len); } STATIC void diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index f62b03186c62..45a1ea240cbb 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops; int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); -extern void xfs_count_page_state(struct page *, int *, int *); extern struct block_device *xfs_find_bdev_for_inode(struct inode *); extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 228821b2ebe0..dc93c51c17de 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -15,18 +15,13 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" -#include "xfs_alloc.h" #include "xfs_attr_remote.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_leaf.h" -#include "xfs_error.h" #include "xfs_quota.h" -#include "xfs_trace.h" #include "xfs_dir2.h" -#include "xfs_defer.h" /* * Look at all the extents for this logical region, @@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive( int size; int tmp; int i; - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; leaf = bp->b_addr; xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 3d213a7394c5..58fc820a70c6 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -6,25 +6,20 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_attr.h" #include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" #include "xfs_attr_leaf.h" #include "xfs_error.h" #include "xfs_trace.h" -#include "xfs_buf_item.h" -#include "xfs_cksum.h" #include "xfs_dir2.h" STATIC int diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c new file mode 100644 index 000000000000..e2148f2d5d6b --- /dev/null +++ b/fs/xfs/xfs_bio_io.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Christoph Hellwig. + */ +#include "xfs.h" + +static inline unsigned int bio_max_vecs(unsigned int count) +{ + return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES); +} + +int +xfs_rw_bdev( + struct block_device *bdev, + sector_t sector, + unsigned int count, + char *data, + unsigned int op) + +{ + unsigned int is_vmalloc = is_vmalloc_addr(data); + unsigned int left = count; + int error; + struct bio *bio; + + if (is_vmalloc && op == REQ_OP_WRITE) + flush_kernel_vmap_range(data, count); + + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = sector; + bio->bi_opf = op | REQ_META | REQ_SYNC; + + do { + struct page *page = kmem_to_page(data); + unsigned int off = offset_in_page(data); + unsigned int len = min_t(unsigned, left, PAGE_SIZE - off); + + while (bio_add_page(bio, page, len, off) != len) { + struct bio *prev = bio; + + bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left)); + bio_copy_dev(bio, prev); + bio->bi_iter.bi_sector = bio_end_sector(prev); + bio->bi_opf = prev->bi_opf; + bio_chain(prev, bio); + + submit_bio(prev); + } + + data += len; + left -= len; + } while (left > 0); + + error = submit_bio_wait(bio); + bio_put(bio); + + if (is_vmalloc && op == REQ_OP_READ) + invalidate_kernel_vmap_range(data, count); + return error; +} diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index ce45f066995e..9fa4a7ee8cfc 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -9,17 +9,16 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_shared.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_bmap_item.h" #include "xfs_log.h" #include "xfs_bmap.h" #include "xfs_icache.h" -#include "xfs_trace.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" @@ -96,15 +95,6 @@ xfs_bui_item_format( } /* - * Pinning has no meaning for an bui item, so just return. - */ -STATIC void -xfs_bui_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an BUI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the BUI transaction has been successfully committed to make it @@ -123,71 +113,22 @@ xfs_bui_item_unpin( } /* - * BUI items have no locking or pushing. However, since BUIs are pulled from - * the AIL when their corresponding BUDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the BUI out of - * the AIL. - */ -STATIC uint -xfs_bui_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The BUI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an BUD isn't going to be * constructed and thus we free the BUI here directly. */ STATIC void -xfs_bui_item_unlock( +xfs_bui_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_bui_release(BUI_ITEM(lip)); -} - -/* - * The BUI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_bui_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; + xfs_bui_release(BUI_ITEM(lip)); } -/* - * The BUI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_bui_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all bui log items. - */ static const struct xfs_item_ops xfs_bui_item_ops = { .iop_size = xfs_bui_item_size, .iop_format = xfs_bui_item_format, - .iop_pin = xfs_bui_item_pin, .iop_unpin = xfs_bui_item_unpin, - .iop_unlock = xfs_bui_item_unlock, - .iop_committed = xfs_bui_item_committed, - .iop_push = xfs_bui_item_push, - .iop_committing = xfs_bui_item_committing, + .iop_release = xfs_bui_item_release, }; /* @@ -249,126 +190,241 @@ xfs_bud_item_format( } /* - * Pinning has no meaning for an bud item, so just return. + * The BUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the BUI and free the + * BUD. */ STATIC void -xfs_bud_item_pin( +xfs_bud_item_release( struct xfs_log_item *lip) { + struct xfs_bud_log_item *budp = BUD_ITEM(lip); + + xfs_bui_release(budp->bud_buip); + kmem_zone_free(xfs_bud_zone, budp); } -/* - * Since pinning has no meaning for an bud item, unpinning does - * not either. - */ -STATIC void -xfs_bud_item_unpin( - struct xfs_log_item *lip, - int remove) +static const struct xfs_item_ops xfs_bud_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_bud_item_size, + .iop_format = xfs_bud_item_format, + .iop_release = xfs_bud_item_release, +}; + +static struct xfs_bud_log_item * +xfs_trans_get_bud( + struct xfs_trans *tp, + struct xfs_bui_log_item *buip) { + struct xfs_bud_log_item *budp; + + budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); + xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD, + &xfs_bud_item_ops); + budp->bud_buip = buip; + budp->bud_format.bud_bui_id = buip->bui_format.bui_id; + + xfs_trans_add_item(tp, &budp->bud_item); + return budp; } /* - * There isn't much you can do to push on an bud item. It is simply stuck - * waiting for the log to be flushed to disk. + * Finish an bmap update and log it to the BUD. Note that the + * transaction is marked dirty regardless of whether the bmap update + * succeeds or fails to support the BUI/BUD lifecycle rules. */ -STATIC uint -xfs_bud_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +static int +xfs_trans_log_finish_bmap_update( + struct xfs_trans *tp, + struct xfs_bud_log_item *budp, + enum xfs_bmap_intent_type type, + struct xfs_inode *ip, + int whichfork, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t *blockcount, + xfs_exntst_t state) { - return XFS_ITEM_PINNED; + int error; + + error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, + startblock, blockcount, state); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the BUI and frees the BUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); + + return error; } -/* - * The BUD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the BUI and free the - * BUD. - */ -STATIC void -xfs_bud_item_unlock( - struct xfs_log_item *lip) +/* Sort bmap intents by inode. */ +static int +xfs_bmap_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_bud_log_item *budp = BUD_ITEM(lip); + struct xfs_bmap_intent *ba; + struct xfs_bmap_intent *bb; - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_bui_release(budp->bud_buip); - kmem_zone_free(xfs_bud_zone, budp); - } + ba = container_of(a, struct xfs_bmap_intent, bi_list); + bb = container_of(b, struct xfs_bmap_intent, bi_list); + return ba->bi_owner->i_ino - bb->bi_owner->i_ino; } -/* - * When the bud item is committed to disk, all we need to do is delete our - * reference to our partner bui item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from - * further referencing this item. - */ -STATIC xfs_lsn_t -xfs_bud_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Get an BUI. */ +STATIC void * +xfs_bmap_update_create_intent( + struct xfs_trans *tp, + unsigned int count) { - struct xfs_bud_log_item *budp = BUD_ITEM(lip); + struct xfs_bui_log_item *buip; + + ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); + ASSERT(tp != NULL); + + buip = xfs_bui_init(tp->t_mountp); + ASSERT(buip != NULL); /* - * Drop the BUI reference regardless of whether the BUD has been - * aborted. Once the BUD transaction is constructed, it is the sole - * responsibility of the BUD to release the BUI (even if the BUI is - * aborted due to log I/O error). + * Get a log_item_desc to point at the new item. */ - xfs_bui_release(budp->bud_buip); - kmem_zone_free(xfs_bud_zone, budp); + xfs_trans_add_item(tp, &buip->bui_item); + return buip; +} - return (xfs_lsn_t)-1; +/* Set the map extent flags for this mapping. */ +static void +xfs_trans_set_bmap_flags( + struct xfs_map_extent *bmap, + enum xfs_bmap_intent_type type, + int whichfork, + xfs_exntst_t state) +{ + bmap->me_flags = 0; + switch (type) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + bmap->me_flags = type; + break; + default: + ASSERT(0); + } + if (state == XFS_EXT_UNWRITTEN) + bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; + if (whichfork == XFS_ATTR_FORK) + bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; } -/* - * The BUD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ +/* Log bmap updates in the intent item. */ STATIC void -xfs_bud_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +xfs_bmap_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { + struct xfs_bui_log_item *buip = intent; + struct xfs_bmap_intent *bmap; + uint next_extent; + struct xfs_map_extent *map; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); + + /* + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. + */ + next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; + ASSERT(next_extent < buip->bui_format.bui_nextents); + map = &buip->bui_format.bui_extents[next_extent]; + map->me_owner = bmap->bi_owner->i_ino; + map->me_startblock = bmap->bi_bmap.br_startblock; + map->me_startoff = bmap->bi_bmap.br_startoff; + map->me_len = bmap->bi_bmap.br_blockcount; + xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, + bmap->bi_bmap.br_state); } -/* - * This is the ops vector shared by all bud log items. - */ -static const struct xfs_item_ops xfs_bud_item_ops = { - .iop_size = xfs_bud_item_size, - .iop_format = xfs_bud_item_format, - .iop_pin = xfs_bud_item_pin, - .iop_unpin = xfs_bud_item_unpin, - .iop_unlock = xfs_bud_item_unlock, - .iop_committed = xfs_bud_item_committed, - .iop_push = xfs_bud_item_push, - .iop_committing = xfs_bud_item_committing, -}; +/* Get an BUD so we can process all the deferred rmap updates. */ +STATIC void * +xfs_bmap_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_bud(tp, intent); +} -/* - * Allocate and initialize an bud item with the given number of extents. - */ -struct xfs_bud_log_item * -xfs_bud_init( - struct xfs_mount *mp, - struct xfs_bui_log_item *buip) +/* Process a deferred rmap update. */ +STATIC int +xfs_bmap_update_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_bmap_intent *bmap; + xfs_filblks_t count; + int error; + + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + count = bmap->bi_bmap.br_blockcount; + error = xfs_trans_log_finish_bmap_update(tp, done_item, + bmap->bi_type, + bmap->bi_owner, bmap->bi_whichfork, + bmap->bi_bmap.br_startoff, + bmap->bi_bmap.br_startblock, + &count, + bmap->bi_bmap.br_state); + if (!error && count > 0) { + ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); + bmap->bi_bmap.br_blockcount = count; + return -EAGAIN; + } + kmem_free(bmap); + return error; +} +/* Abort all pending BUIs. */ +STATIC void +xfs_bmap_update_abort_intent( + void *intent) { - struct xfs_bud_log_item *budp; + xfs_bui_release(intent); +} - budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); - xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); - budp->bud_buip = buip; - budp->bud_format.bud_bui_id = buip->bui_format.bui_id; +/* Cancel a deferred rmap update. */ +STATIC void +xfs_bmap_update_cancel_item( + struct list_head *item) +{ + struct xfs_bmap_intent *bmap; - return budp; + bmap = container_of(item, struct xfs_bmap_intent, bi_list); + kmem_free(bmap); } +const struct xfs_defer_op_type xfs_bmap_update_defer_type = { + .max_items = XFS_BUI_MAX_FAST_EXTENTS, + .diff_items = xfs_bmap_update_diff_items, + .create_intent = xfs_bmap_update_create_intent, + .abort_intent = xfs_bmap_update_abort_intent, + .log_item = xfs_bmap_update_log_item, + .create_done = xfs_bmap_update_create_done, + .finish_item = xfs_bmap_update_finish_item, + .cancel_item = xfs_bmap_update_cancel_item, +}; + /* * Process a bmap update intent item that was recovered from the log. * We need to update some inode's bmbt. diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index 89e043a88bb8..ad479cc73de8 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -75,8 +75,6 @@ extern struct kmem_zone *xfs_bui_zone; extern struct kmem_zone *xfs_bud_zone; struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); -struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, - struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 06d07f1e310b..98c6a7a71427 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -12,12 +12,10 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" -#include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -28,11 +26,8 @@ #include "xfs_trans_space.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_log.h" -#include "xfs_rmap_btree.h" #include "xfs_iomap.h" #include "xfs_reflink.h" -#include "xfs_refcount.h" /* Kernel only BMAP related definitions and functions */ @@ -276,7 +271,7 @@ xfs_bmap_count_tree( struct xfs_btree_block *block, *nextblock; int numrecs; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) return error; @@ -287,7 +282,7 @@ xfs_bmap_count_tree( /* Not at node above leaves, count this level of nodes */ nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); while (nextbno != NULLFSBLOCK) { - error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, + error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) @@ -321,7 +316,7 @@ xfs_bmap_count_tree( if (nextbno == NULLFSBLOCK) break; bno = nextbno; - error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, + error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); if (error) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 548344e25128..ca0849043f54 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -4,24 +4,9 @@ * All Rights Reserved. */ #include "xfs.h" -#include <linux/stddef.h> -#include <linux/errno.h> -#include <linux/gfp.h> -#include <linux/pagemap.h> -#include <linux/init.h> -#include <linux/vmalloc.h> -#include <linux/bio.h> -#include <linux/sysctl.h> -#include <linux/proc_fs.h> -#include <linux/workqueue.h> -#include <linux/percpu.h> -#include <linux/blkdev.h> -#include <linux/hash.h> -#include <linux/kthread.h> -#include <linux/migrate.h> #include <linux/backing-dev.h> -#include <linux/freezer.h> +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -213,7 +198,7 @@ xfs_buf_free_maps( } } -struct xfs_buf * +static struct xfs_buf * _xfs_buf_alloc( struct xfs_buftarg *target, struct xfs_buf_map *map, @@ -243,6 +228,7 @@ _xfs_buf_alloc( sema_init(&bp->b_sema, 0); /* held, no waiters */ spin_lock_init(&bp->b_lock); bp->b_target = target; + bp->b_mount = target->bt_mount; bp->b_flags = flags; /* @@ -263,12 +249,11 @@ _xfs_buf_alloc( bp->b_maps[i].bm_len = map[i].bm_len; bp->b_length += map[i].bm_len; } - bp->b_io_length = bp->b_length; atomic_set(&bp->b_pin_count, 0); init_waitqueue_head(&bp->b_waiters); - XFS_STATS_INC(target->bt_mount, xb_create); + XFS_STATS_INC(bp->b_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); return bp; @@ -425,12 +410,12 @@ retry: current->comm, current->pid, __func__, gfp_mask); - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); + XFS_STATS_INC(bp->b_mount, xb_page_retries); congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry; } - XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); + XFS_STATS_INC(bp->b_mount, xb_page_found); nbytes = min_t(size_t, size, PAGE_SIZE - offset); size -= nbytes; @@ -909,83 +894,6 @@ xfs_buf_read_uncached( return 0; } -/* - * Return a buffer allocated as an empty buffer and associated to external - * memory via xfs_buf_associate_memory() back to it's empty state. - */ -void -xfs_buf_set_empty( - struct xfs_buf *bp, - size_t numblks) -{ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_page_count = 0; - bp->b_addr = NULL; - bp->b_length = numblks; - bp->b_io_length = numblks; - - ASSERT(bp->b_map_count == 1); - bp->b_bn = XFS_BUF_DADDR_NULL; - bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL; - bp->b_maps[0].bm_len = bp->b_length; -} - -static inline struct page * -mem_to_page( - void *addr) -{ - if ((!is_vmalloc_addr(addr))) { - return virt_to_page(addr); - } else { - return vmalloc_to_page(addr); - } -} - -int -xfs_buf_associate_memory( - xfs_buf_t *bp, - void *mem, - size_t len) -{ - int rval; - int i = 0; - unsigned long pageaddr; - unsigned long offset; - size_t buflen; - int page_count; - - pageaddr = (unsigned long)mem & PAGE_MASK; - offset = (unsigned long)mem - pageaddr; - buflen = PAGE_ALIGN(len + offset); - page_count = buflen >> PAGE_SHIFT; - - /* Free any previous set of page pointers */ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_addr = mem; - - rval = _xfs_buf_get_pages(bp, page_count); - if (rval) - return rval; - - bp->b_offset = offset; - - for (i = 0; i < bp->b_page_count; i++) { - bp->b_pages[i] = mem_to_page((void *)pageaddr); - pageaddr += PAGE_SIZE; - } - - bp->b_io_length = BTOBB(len); - bp->b_length = BTOBB(buflen); - - return 0; -} - xfs_buf_t * xfs_buf_get_uncached( struct xfs_buftarg *target, @@ -1180,7 +1088,7 @@ xfs_buf_lock( trace_xfs_buf_lock(bp, _RET_IP_); if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); + xfs_log_force(bp->b_mount, 0); down(&bp->b_sema); trace_xfs_buf_lock_done(bp, _RET_IP_); @@ -1269,7 +1177,7 @@ xfs_buf_ioend_async( struct xfs_buf *bp) { INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); - queue_work(bp->b_ioend_wq, &bp->b_ioend_work); + queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work); } void @@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert( struct xfs_buf *bp, const char *func) { - xfs_alert(bp->b_target->bt_mount, + xfs_alert(bp->b_mount, "metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, -bp->b_error); @@ -1307,10 +1215,8 @@ xfs_bwrite( XBF_WRITE_FAIL | XBF_DONE); error = xfs_buf_submit(bp); - if (error) { - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); - } + if (error) + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); return error; } @@ -1436,21 +1342,8 @@ _xfs_buf_ioapply( */ bp->b_error = 0; - /* - * Initialize the I/O completion workqueue if we haven't yet or the - * submitter has not opted to specify a custom one. - */ - if (!bp->b_ioend_wq) - bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue; - if (bp->b_flags & XBF_WRITE) { op = REQ_OP_WRITE; - if (bp->b_flags & XBF_SYNCIO) - op_flags = REQ_SYNC; - if (bp->b_flags & XBF_FUA) - op_flags |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - op_flags |= REQ_PREFLUSH; /* * Run the write verifier callback function if it exists. If @@ -1460,12 +1353,12 @@ _xfs_buf_ioapply( if (bp->b_ops) { bp->b_ops->verify_write(bp); if (bp->b_error) { - xfs_force_shutdown(bp->b_target->bt_mount, + xfs_force_shutdown(bp->b_mount, SHUTDOWN_CORRUPT_INCORE); return; } } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; /* * non-crc filesystems don't attach verifiers during @@ -1497,7 +1390,7 @@ _xfs_buf_ioapply( * subsequent call. */ offset = bp->b_offset; - size = BBTOB(bp->b_io_length); + size = BBTOB(bp->b_length); blk_start_plug(&plug); for (i = 0; i < bp->b_map_count; i++) { xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); @@ -1543,7 +1436,7 @@ __xfs_buf_submit( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); /* on shutdown we stale and complete the buffer immediately */ - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buf_ioerror(bp, -EIO); bp->b_flags &= ~XBF_DONE; xfs_buf_stale(bp); @@ -1613,16 +1506,11 @@ xfs_buf_offset( return page_address(page) + (offset & (PAGE_SIZE-1)); } -/* - * Move data into or out of a buffer. - */ void -xfs_buf_iomove( - xfs_buf_t *bp, /* buffer to process */ - size_t boff, /* starting buffer offset */ - size_t bsize, /* length to copy */ - void *data, /* data address */ - xfs_buf_rw_t mode) /* read/write/zero flag */ +xfs_buf_zero( + struct xfs_buf *bp, + size_t boff, + size_t bsize) { size_t bend; @@ -1635,23 +1523,13 @@ xfs_buf_iomove( page_offset = (boff + bp->b_offset) & ~PAGE_MASK; page = bp->b_pages[page_index]; csize = min_t(size_t, PAGE_SIZE - page_offset, - BBTOB(bp->b_io_length) - boff); + BBTOB(bp->b_length) - boff); ASSERT((csize + page_offset) <= PAGE_SIZE); - switch (mode) { - case XBRW_ZERO: - memset(page_address(page) + page_offset, 0, csize); - break; - case XBRW_READ: - memcpy(data, page_address(page) + page_offset, csize); - break; - case XBRW_WRITE: - memcpy(page_address(page) + page_offset, data, csize); - } + memset(page_address(page) + page_offset, 0, csize); boff += csize; - data += csize; } } @@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) * This allows userspace to disrupt buffer caching for debug/testing * purposes. */ - if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, - XFS_ERRTAG_BUF_LRU_REF)) + if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF)) lru_ref = 0; atomic_set(&bp->b_lru_ref, lru_ref); @@ -2215,7 +2092,7 @@ xfs_verify_magic( struct xfs_buf *bp, __be32 dmagic) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; int idx; idx = xfs_sb_version_hascrc(&mp->m_sb); @@ -2233,7 +2110,7 @@ xfs_verify_magic16( struct xfs_buf *bp, __be16 dmagic) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; int idx; idx = xfs_sb_version_hascrc(&mp->m_sb); diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index d0b96e071cec..c6e57a3f409e 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -21,12 +21,6 @@ #define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) -typedef enum { - XBRW_READ = 1, /* transfer into target memory */ - XBRW_WRITE = 2, /* transfer from target memory */ - XBRW_ZERO = 3, /* Zero target memory */ -} xfs_buf_rw_t; - #define XBF_READ (1 << 0) /* buffer intended for reading from device */ #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ @@ -34,12 +28,7 @@ typedef enum { #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ -#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ - -/* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ +#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */ /* flags used only as arguments to access routines */ #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ @@ -49,7 +38,6 @@ typedef enum { #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ -#define _XBF_COMPOUND (1 << 23)/* compound buffer */ typedef unsigned int xfs_buf_flags_t; @@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t; { XBF_DONE, "DONE" }, \ { XBF_STALE, "STALE" }, \ { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ - { XBF_SYNCIO, "SYNCIO" }, \ - { XBF_FUA, "FUA" }, \ - { XBF_FLUSH, "FLUSH" }, \ { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ - { _XBF_DELWRI_Q, "DELWRI_Q" }, \ - { _XBF_COMPOUND, "COMPOUND" } + { _XBF_DELWRI_Q, "DELWRI_Q" } /* @@ -161,13 +145,13 @@ typedef struct xfs_buf { wait_queue_head_t b_waiters; /* unpin waiters */ struct list_head b_list; struct xfs_perag *b_pag; /* contains rbtree root */ + struct xfs_mount *b_mount; xfs_buftarg_t *b_target; /* buffer target (device) */ void *b_addr; /* virtual address of buffer */ struct work_struct b_ioend_work; - struct workqueue_struct *b_ioend_wq; /* I/O completion wq */ xfs_buf_iodone_t b_iodone; /* I/O completion function */ struct completion b_iowait; /* queue for I/O waiters */ - void *b_log_item; + struct xfs_buf_log_item *b_log_item; struct list_head b_li_list; /* Log items list head */ struct xfs_trans *b_transp; struct page **b_pages; /* array of page pointers */ @@ -175,7 +159,6 @@ typedef struct xfs_buf { struct xfs_buf_map *b_maps; /* compound buffer map */ struct xfs_buf_map __b_map; /* inline compound buffer map */ int b_map_count; - int b_io_length; /* IO size in BBs */ atomic_t b_pin_count; /* pin count */ atomic_t b_io_remaining; /* #outstanding I/O requests */ unsigned int b_page_count; /* size of page array */ @@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, xfs_daddr_t blkno, size_t numblks, xfs_buf_flags_t flags); -struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target, - struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags); - -static inline struct xfs_buf * -xfs_buf_alloc( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) -{ - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return _xfs_buf_alloc(target, &map, 1, flags); -} - struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags); @@ -239,11 +207,10 @@ static inline struct xfs_buf * xfs_buf_get( struct xfs_buftarg *target, xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) + size_t numblks) { DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - return xfs_buf_get_map(target, &map, 1, flags); + return xfs_buf_get_map(target, &map, 1, 0); } static inline struct xfs_buf * @@ -269,9 +236,6 @@ xfs_buf_readahead( return xfs_buf_readahead_map(target, &map, 1, ops); } -void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks); -int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length); - struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags); int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, @@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp) return __xfs_buf_submit(bp, wait); } -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, - xfs_buf_rw_t); -#define xfs_buf_zero(bp, off, len) \ - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) +void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize); /* Buffer Utility Routines */ extern void *xfs_buf_offset(struct xfs_buf *, size_t); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 65b32acfa0f6..7dcaec54a20b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -5,19 +5,17 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" -#include "xfs_error.h" #include "xfs_trace.h" #include "xfs_log.h" -#include "xfs_inode.h" kmem_zone_t *xfs_buf_item_zone; @@ -520,7 +518,7 @@ xfs_buf_item_push( /* has a previous flush failed due to IO errors? */ if ((bp->b_flags & XBF_WRITE_FAIL) && ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { - xfs_warn(bp->b_target->bt_mount, + xfs_warn(bp->b_mount, "Failing async write on buffer block 0x%llx. Retrying async write.", (long long)bp->b_bn); } @@ -594,7 +592,7 @@ xfs_buf_item_put( * free the item. */ STATIC void -xfs_buf_item_unlock( +xfs_buf_item_release( struct xfs_log_item *lip) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); @@ -609,7 +607,7 @@ xfs_buf_item_unlock( &lip->li_flags); #endif - trace_xfs_buf_item_unlock(bip); + trace_xfs_buf_item_release(bip); /* * The bli dirty state should match whether the blf has logged segments @@ -639,6 +637,14 @@ xfs_buf_item_unlock( xfs_buf_relse(bp); } +STATIC void +xfs_buf_item_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) +{ + return xfs_buf_item_release(lip); +} + /* * This is called to find out where the oldest active copy of the * buf log item in the on disk log resides now that the last log @@ -671,25 +677,15 @@ xfs_buf_item_committed( return lsn; } -STATIC void -xfs_buf_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) -{ -} - -/* - * This is the ops vector shared by all buf log items. - */ static const struct xfs_item_ops xfs_buf_item_ops = { .iop_size = xfs_buf_item_size, .iop_format = xfs_buf_item_format, .iop_pin = xfs_buf_item_pin, .iop_unpin = xfs_buf_item_unpin, - .iop_unlock = xfs_buf_item_unlock, + .iop_release = xfs_buf_item_release, + .iop_committing = xfs_buf_item_committing, .iop_committed = xfs_buf_item_committed, .iop_push = xfs_buf_item_push, - .iop_committing = xfs_buf_item_committing }; STATIC int @@ -743,7 +739,7 @@ xfs_buf_item_init( * this buffer. If we do already have one, there is * nothing to do here so return. */ - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); if (bip) { ASSERT(bip->bli_item.li_type == XFS_LI_BUF); ASSERT(!bp->b_transp); @@ -980,9 +976,9 @@ xfs_buf_item_relse( */ void xfs_buf_attach_iodone( - xfs_buf_t *bp, - void (*cb)(xfs_buf_t *, xfs_log_item_t *), - xfs_log_item_t *lip) + struct xfs_buf *bp, + void (*cb)(struct xfs_buf *, struct xfs_log_item *), + struct xfs_log_item *lip) { ASSERT(xfs_buf_islocked(bp)); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 90f65f891fab..4a054b11011a 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -39,7 +39,7 @@ struct xfs_buf_log_item; * locked, and which 128 byte chunks of the buffer are dirty. */ struct xfs_buf_log_item { - xfs_log_item_t bli_item; /* common item structure */ + struct xfs_log_item bli_item; /* common item structure */ struct xfs_buf *bli_buf; /* real buffer pointer */ unsigned int bli_flags; /* misc flags */ unsigned int bli_recur; /* lock recursion count */ @@ -55,8 +55,8 @@ bool xfs_buf_item_put(struct xfs_buf_log_item *); void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); void xfs_buf_attach_iodone(struct xfs_buf *, - void(*)(struct xfs_buf *, xfs_log_item_t *), - xfs_log_item_t *); + void(*)(struct xfs_buf *, struct xfs_log_item *), + struct xfs_log_item *); void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 5142e64e2345..283df898dd9f 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -6,17 +6,14 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" -#include "xfs_error.h" #include "xfs_trace.h" #include "xfs_bmap.h" #include "xfs_trans.h" diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index d0df0ed50f4b..8ec7aab89044 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -4,19 +4,17 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_extent_busy.h" -#include "xfs_discard.h" #include "xfs_trace.h" #include "xfs_log.h" diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index a1af984e4913..fb1ad4483081 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -14,16 +14,12 @@ #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_alloc.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_space.h" #include "xfs_trans_priv.h" #include "xfs_qm.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_bmap_btree.h" @@ -1243,7 +1239,7 @@ xfs_qm_exit(void) /* * Iterate every dquot of a particular type. The caller must ensure that the * particular quota type is active. iter_fn can return negative error codes, - * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating. + * or XFS_ITER_ABORT to indicate that it wants to stop iterating. */ int xfs_qm_dqiterate( diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 64bd8640f6e8..4fe85709d55d 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -34,7 +34,6 @@ typedef struct xfs_dquot { uint dq_flags; /* various flags (XFS_DQ_*) */ struct list_head q_lru; /* global free list of dquots */ struct xfs_mount*q_mount; /* filesystem this relates to */ - struct xfs_trans*q_transp; /* trans this belongs to currently */ uint q_nrefs; /* # active refs from inodes */ xfs_daddr_t q_blkno; /* blkno of dquot buffer */ int q_bufoffset; /* off of dq in buffer (# dquots) */ diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 7dedd17c4813..282ec5af293e 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -5,13 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" @@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin( wake_up(&dqp->q_pinwait); } -STATIC xfs_lsn_t -xfs_qm_dquot_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - /* - * We always re-log the entire dquot when it becomes dirty, - * so, the latest copy _is_ the only one that matters. - */ - return lsn; -} - /* * This is called to wait for the given dquot to be unpinned. * Most of these pin/unpin routines are plagiarized from inode code. @@ -209,14 +197,8 @@ out_unlock: return rval; } -/* - * Unlock the dquot associated with the log item. - * Clear the fields of the dquot and dquot log item that - * are specific to the current transaction. If the - * hold flags is set, do not unlock the dquot. - */ STATIC void -xfs_qm_dquot_logitem_unlock( +xfs_qm_dquot_logitem_release( struct xfs_log_item *lip) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; @@ -224,11 +206,6 @@ xfs_qm_dquot_logitem_unlock( ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* - * Clear the transaction pointer in the dquot - */ - dqp->q_transp = NULL; - - /* * dquots are never 'held' from getting unlocked at the end of * a transaction. Their locking and unlocking is hidden inside the * transaction layer, within trans_commit. Hence, no LI_HOLD flag @@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock( xfs_dqunlock(dqp); } -/* - * this needs to stamp an lsn into the dquot, I think. - * rpc's that look at user dquot's would then have to - * push on the dependency recorded in the dquot - */ STATIC void xfs_qm_dquot_logitem_committing( struct xfs_log_item *lip, - xfs_lsn_t lsn) + xfs_lsn_t commit_lsn) { + return xfs_qm_dquot_logitem_release(lip); } -/* - * This is the ops vector for dquots - */ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_size = xfs_qm_dquot_logitem_size, .iop_format = xfs_qm_dquot_logitem_format, .iop_pin = xfs_qm_dquot_logitem_pin, .iop_unpin = xfs_qm_dquot_logitem_unpin, - .iop_unlock = xfs_qm_dquot_logitem_unlock, - .iop_committed = xfs_qm_dquot_logitem_committed, + .iop_release = xfs_qm_dquot_logitem_release, + .iop_committing = xfs_qm_dquot_logitem_committing, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing, .iop_error = xfs_dquot_item_error }; @@ -320,26 +289,6 @@ xfs_qm_qoff_logitem_format( } /* - * Pinning has no meaning for an quotaoff item, so just return. - */ -STATIC void -xfs_qm_qoff_logitem_pin( - struct xfs_log_item *lip) -{ -} - -/* - * Since pinning has no meaning for an quotaoff item, unpinning does - * not either. - */ -STATIC void -xfs_qm_qoff_logitem_unpin( - struct xfs_log_item *lip, - int remove) -{ -} - -/* * There isn't much you can do to push a quotaoff item. It is simply * stuck waiting for the log to be flushed to disk. */ @@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push( return XFS_ITEM_LOCKED; } -/* - * Quotaoff items have no locking or pushing, so return failure - * so that the caller doesn't bother with us. - */ -STATIC void -xfs_qm_qoff_logitem_unlock( - struct xfs_log_item *lip) -{ -} - -/* - * The quotaoff-start-item is logged only once and cannot be moved in the log, - * so simply return the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_qm_qoff_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - STATIC xfs_lsn_t xfs_qm_qoffend_logitem_committed( struct xfs_log_item *lip, @@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed( return (xfs_lsn_t)-1; } -/* - * XXX rcc - don't know quite what to do with this. I think we can - * just ignore it. The only time that isn't the case is if we allow - * the client to somehow see that quotas have been turned off in which - * we can't allow that to get back until the quotaoff hits the disk. - * So how would that happen? Also, do we need different routines for - * quotaoff start and quotaoff end? I suspect the answer is yes but - * to be sure, I need to look at the recovery code and see how quota off - * recovery is handled (do we roll forward or back or do something else). - * If we roll forwards or backwards, then we need two separate routines, - * one that does nothing and one that stamps in the lsn that matters - * (truly makes the quotaoff irrevocable). If we do something else, - * then maybe we don't need two. - */ -STATIC void -xfs_qm_qoff_logitem_committing( - struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) -{ -} - static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { .iop_size = xfs_qm_qoff_logitem_size, .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_unlock = xfs_qm_qoff_logitem_unlock, .iop_committed = xfs_qm_qoffend_logitem_committed, .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing }; -/* - * This is the ops vector shared by all quotaoff-start log items. - */ static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { .iop_size = xfs_qm_qoff_logitem_size, .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_unlock = xfs_qm_qoff_logitem_unlock, - .iop_committed = xfs_qm_qoff_logitem_committed, .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing }; /* diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index db9df710a308..1aed34ccdabc 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -12,13 +12,13 @@ struct xfs_mount; struct xfs_qoff_logitem; typedef struct xfs_dq_logitem { - xfs_log_item_t qli_item; /* common portion */ + struct xfs_log_item qli_item; /* common portion */ struct xfs_dquot *qli_dquot; /* dquot ptr */ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ } xfs_dq_logitem_t; typedef struct xfs_qoff_logitem { - xfs_log_item_t qql_item; /* common portion */ + struct xfs_log_item qql_item; /* common portion */ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ unsigned int qql_flags; } xfs_qoff_logitem_t; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index a1e177f66404..544c9482a0ef 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -4,6 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_fs.h" #include "xfs_log_format.h" @@ -353,7 +354,7 @@ xfs_buf_verifier_error( size_t bufsz, xfs_failaddr_t failaddr) { - struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_mount *mp = bp->b_mount; xfs_failaddr_t fa; int sz; diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index f2284ceb129f..f1372f9046e3 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -4,18 +4,16 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_export.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_log.h" #include "xfs_pnfs.h" diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 74ddf66f4cfe..86f6512d6864 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -9,14 +9,18 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" +#include "xfs_shared.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_extfree_item.h" #include "xfs_log.h" #include "xfs_btree.h" #include "xfs_rmap.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_trace.h" kmem_zone_t *xfs_efi_zone; @@ -107,15 +111,6 @@ xfs_efi_item_format( /* - * Pinning has no meaning for an efi item, so just return. - */ -STATIC void -xfs_efi_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an EFI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the EFI transaction has been successfully committed to make it @@ -133,71 +128,22 @@ xfs_efi_item_unpin( } /* - * Efi items have no locking or pushing. However, since EFIs are pulled from - * the AIL when their corresponding EFDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the EFI out of - * the AIL. - */ -STATIC uint -xfs_efi_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The EFI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an EFD isn't going to be * constructed and thus we free the EFI here directly. */ STATIC void -xfs_efi_item_unlock( +xfs_efi_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_efi_release(EFI_ITEM(lip)); -} - -/* - * The EFI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_efi_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * The EFI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_efi_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ + xfs_efi_release(EFI_ITEM(lip)); } -/* - * This is the ops vector shared by all efi log items. - */ static const struct xfs_item_ops xfs_efi_item_ops = { .iop_size = xfs_efi_item_size, .iop_format = xfs_efi_item_format, - .iop_pin = xfs_efi_item_pin, .iop_unpin = xfs_efi_item_unpin, - .iop_unlock = xfs_efi_item_unlock, - .iop_committed = xfs_efi_item_committed, - .iop_push = xfs_efi_item_push, - .iop_committing = xfs_efi_item_committing + .iop_release = xfs_efi_item_release, }; @@ -349,136 +295,298 @@ xfs_efd_item_format( } /* - * Pinning has no meaning for an efd item, so just return. + * The EFD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the EFI and free the EFD. */ STATIC void -xfs_efd_item_pin( +xfs_efd_item_release( struct xfs_log_item *lip) { + struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + + xfs_efi_release(efdp->efd_efip); + xfs_efd_item_free(efdp); } +static const struct xfs_item_ops xfs_efd_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_efd_item_size, + .iop_format = xfs_efd_item_format, + .iop_release = xfs_efd_item_release, +}; + /* - * Since pinning has no meaning for an efd item, unpinning does - * not either. + * Allocate an "extent free done" log item that will hold nextents worth of + * extents. The caller must use all nextents extents, because we are not + * flexible about this at all. */ -STATIC void -xfs_efd_item_unpin( - struct xfs_log_item *lip, - int remove) +static struct xfs_efd_log_item * +xfs_trans_get_efd( + struct xfs_trans *tp, + struct xfs_efi_log_item *efip, + unsigned int nextents) { + struct xfs_efd_log_item *efdp; + + ASSERT(nextents > 0); + + if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { + efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) + + (nextents - 1) * sizeof(struct xfs_extent), + KM_SLEEP); + } else { + efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); + } + + xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD, + &xfs_efd_item_ops); + efdp->efd_efip = efip; + efdp->efd_format.efd_nextents = nextents; + efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; + + xfs_trans_add_item(tp, &efdp->efd_item); + return efdp; } /* - * There isn't much you can do to push on an efd item. It is simply stuck - * waiting for the log to be flushed to disk. + * Free an extent and log it to the EFD. Note that the transaction is marked + * dirty regardless of whether the extent free succeeds or fails to support the + * EFI/EFD lifecycle rules. */ -STATIC uint -xfs_efd_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +static int +xfs_trans_free_extent( + struct xfs_trans *tp, + struct xfs_efd_log_item *efdp, + xfs_fsblock_t start_block, + xfs_extlen_t ext_len, + const struct xfs_owner_info *oinfo, + bool skip_discard) { - return XFS_ITEM_PINNED; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_extent *extp; + uint next_extent; + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, + start_block); + int error; + + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); + + error = __xfs_free_extent(tp, start_block, ext_len, + oinfo, XFS_AG_RESV_NONE, skip_discard); + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the EFI and frees the EFD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); + + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); + extp->ext_start = start_block; + extp->ext_len = ext_len; + efdp->efd_next_extent++; + + return error; } -/* - * The EFD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the EFI and free the EFD. - */ -STATIC void -xfs_efd_item_unlock( - struct xfs_log_item *lip) +/* Sort bmap items by AG. */ +static int +xfs_extent_free_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + struct xfs_mount *mp = priv; + struct xfs_extent_free_item *ra; + struct xfs_extent_free_item *rb; + + ra = container_of(a, struct xfs_extent_free_item, xefi_list); + rb = container_of(b, struct xfs_extent_free_item, xefi_list); + return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - + XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); +} - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_efi_release(efdp->efd_efip); - xfs_efd_item_free(efdp); - } +/* Get an EFI. */ +STATIC void * +xfs_extent_free_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_efi_log_item *efip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + efip = xfs_efi_init(tp->t_mountp, count); + ASSERT(efip != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &efip->efi_item); + return efip; } -/* - * When the efd item is committed to disk, all we need to do is delete our - * reference to our partner efi item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from further - * referencing this item. - */ -STATIC xfs_lsn_t -xfs_efd_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Log a free extent to the intent item. */ +STATIC void +xfs_extent_free_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { - struct xfs_efd_log_item *efdp = EFD_ITEM(lip); + struct xfs_efi_log_item *efip = intent; + struct xfs_extent_free_item *free; + uint next_extent; + struct xfs_extent *extp; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); /* - * Drop the EFI reference regardless of whether the EFD has been - * aborted. Once the EFD transaction is constructed, it is the sole - * responsibility of the EFD to release the EFI (even if the EFI is - * aborted due to log I/O error). + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. */ - xfs_efi_release(efdp->efd_efip); - xfs_efd_item_free(efdp); + next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; + ASSERT(next_extent < efip->efi_format.efi_nextents); + extp = &efip->efi_format.efi_extents[next_extent]; + extp->ext_start = free->xefi_startblock; + extp->ext_len = free->xefi_blockcount; +} - return (xfs_lsn_t)-1; +/* Get an EFD so we can process all the free extents. */ +STATIC void * +xfs_extent_free_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_efd(tp, intent, count); } -/* - * The EFD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ +/* Process a free extent. */ +STATIC int +xfs_extent_free_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_extent_free_item *free; + int error; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + error = xfs_trans_free_extent(tp, done_item, + free->xefi_startblock, + free->xefi_blockcount, + &free->xefi_oinfo, free->xefi_skip_discard); + kmem_free(free); + return error; +} + +/* Abort all pending EFIs. */ STATIC void -xfs_efd_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +xfs_extent_free_abort_intent( + void *intent) { + xfs_efi_release(intent); } -/* - * This is the ops vector shared by all efd log items. - */ -static const struct xfs_item_ops xfs_efd_item_ops = { - .iop_size = xfs_efd_item_size, - .iop_format = xfs_efd_item_format, - .iop_pin = xfs_efd_item_pin, - .iop_unpin = xfs_efd_item_unpin, - .iop_unlock = xfs_efd_item_unlock, - .iop_committed = xfs_efd_item_committed, - .iop_push = xfs_efd_item_push, - .iop_committing = xfs_efd_item_committing +/* Cancel a free extent. */ +STATIC void +xfs_extent_free_cancel_item( + struct list_head *item) +{ + struct xfs_extent_free_item *free; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + kmem_free(free); +} + +const struct xfs_defer_op_type xfs_extent_free_defer_type = { + .max_items = XFS_EFI_MAX_FAST_EXTENTS, + .diff_items = xfs_extent_free_diff_items, + .create_intent = xfs_extent_free_create_intent, + .abort_intent = xfs_extent_free_abort_intent, + .log_item = xfs_extent_free_log_item, + .create_done = xfs_extent_free_create_done, + .finish_item = xfs_extent_free_finish_item, + .cancel_item = xfs_extent_free_cancel_item, }; /* - * Allocate and initialize an efd item with the given number of extents. + * AGFL blocks are accounted differently in the reserve pools and are not + * inserted into the busy extent list. */ -struct xfs_efd_log_item * -xfs_efd_init( - struct xfs_mount *mp, - struct xfs_efi_log_item *efip, - uint nextents) - +STATIC int +xfs_agfl_free_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) { - struct xfs_efd_log_item *efdp; - uint size; + struct xfs_mount *mp = tp->t_mountp; + struct xfs_efd_log_item *efdp = done_item; + struct xfs_extent_free_item *free; + struct xfs_extent *extp; + struct xfs_buf *agbp; + int error; + xfs_agnumber_t agno; + xfs_agblock_t agbno; + uint next_extent; + + free = container_of(item, struct xfs_extent_free_item, xefi_list); + ASSERT(free->xefi_blockcount == 1); + agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); + + trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); + + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + if (!error) + error = xfs_free_agfl_block(tp, agno, agbno, agbp, + &free->xefi_oinfo); - ASSERT(nextents > 0); - if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { - size = (uint)(sizeof(xfs_efd_log_item_t) + - ((nextents - 1) * sizeof(xfs_extent_t))); - efdp = kmem_zalloc(size, KM_SLEEP); - } else { - efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); - } + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the EFI and frees the EFD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); - xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); - efdp->efd_efip = efip; - efdp->efd_format.efd_nextents = nextents; - efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; + next_extent = efdp->efd_next_extent; + ASSERT(next_extent < efdp->efd_format.efd_nextents); + extp = &(efdp->efd_format.efd_extents[next_extent]); + extp->ext_start = free->xefi_startblock; + extp->ext_len = free->xefi_blockcount; + efdp->efd_next_extent++; - return efdp; + kmem_free(free); + return error; } +/* sub-type with special handling for AGFL deferred frees */ +const struct xfs_defer_op_type xfs_agfl_free_defer_type = { + .max_items = XFS_EFI_MAX_FAST_EXTENTS, + .diff_items = xfs_extent_free_diff_items, + .create_intent = xfs_extent_free_create_intent, + .abort_intent = xfs_extent_free_abort_intent, + .log_item = xfs_extent_free_log_item, + .create_done = xfs_extent_free_create_done, + .finish_item = xfs_agfl_free_finish_item, + .cancel_item = xfs_extent_free_cancel_item, +}; + /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h index 2a6a895ca73e..16aaab06d4ec 100644 --- a/fs/xfs/xfs_extfree_item.h +++ b/fs/xfs/xfs_extfree_item.h @@ -51,7 +51,7 @@ struct kmem_zone; * AIL, so at this point both the EFI and EFD are freed. */ typedef struct xfs_efi_log_item { - xfs_log_item_t efi_item; + struct xfs_log_item efi_item; atomic_t efi_refcount; atomic_t efi_next_extent; unsigned long efi_flags; /* misc flags */ @@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item { * have been freed. */ typedef struct xfs_efd_log_item { - xfs_log_item_t efd_item; + struct xfs_log_item efd_item; xfs_efi_log_item_t *efd_efip; uint efd_next_extent; xfs_efd_log_format_t efd_format; @@ -79,8 +79,6 @@ extern struct kmem_zone *xfs_efi_zone; extern struct kmem_zone *xfs_efd_zone; xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); -xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *, - uint); int xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt); void xfs_efi_item_free(xfs_efi_log_item_t *); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 916a35cae5e9..e93bacbd49ae 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -10,14 +10,11 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_error.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_ioctl.h" @@ -28,9 +25,7 @@ #include "xfs_iomap.h" #include "xfs_reflink.h" -#include <linux/dcache.h> #include <linux/falloc.h> -#include <linux/pagevec.h> #include <linux/backing-dev.h> #include <linux/mman.h> @@ -379,6 +374,7 @@ xfs_dio_write_end_io( struct inode *inode = file_inode(iocb->ki_filp); struct xfs_inode *ip = XFS_I(inode); loff_t offset = iocb->ki_pos; + unsigned int nofs_flag; int error = 0; trace_xfs_end_io_direct_write(ip, offset, size); @@ -395,10 +391,17 @@ xfs_dio_write_end_io( */ XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + if (flags & IOMAP_DIO_COW) { error = xfs_reflink_end_cow(ip, offset, size); if (error) - return error; + goto out; } /* @@ -407,8 +410,10 @@ xfs_dio_write_end_io( * earlier allows a racing dio read to find unwritten extents before * they are converted. */ - if (flags & IOMAP_DIO_UNWRITTEN) - return xfs_iomap_write_unwritten(ip, offset, size, true); + if (flags & IOMAP_DIO_UNWRITTEN) { + error = xfs_iomap_write_unwritten(ip, offset, size, true); + goto out; + } /* * We need to update the in-core inode size here so that we don't end up @@ -430,6 +435,8 @@ xfs_dio_write_end_io( spin_unlock(&ip->i_flags_lock); } +out: + memalloc_nofs_restore(nofs_flag); return error; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 182501373af2..574a7a8b4736 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -5,22 +5,19 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_alloc.h" #include "xfs_mru_cache.h" -#include "xfs_filestream.h" #include "xfs_trace.h" #include "xfs_ag_resv.h" #include "xfs_trans.h" -#include "xfs_shared.h" struct xfs_fstrm_item { struct xfs_mru_cache_elem mru; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 3d76a9e35870..5a8f9641562a 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -9,16 +9,12 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_error.h" #include "xfs_btree.h" #include "xfs_rmap_btree.h" #include "xfs_trace.h" -#include "xfs_log.h" #include "xfs_rmap.h" #include "xfs_alloc.h" #include "xfs_bit.h" diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3d0e0570e3aa..3e61d0cc23f8 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -11,15 +11,11 @@ #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_error.h" -#include "xfs_btree.h" #include "xfs_alloc.h" #include "xfs_fsops.h" #include "xfs_trans_space.h" -#include "xfs_rtalloc.h" -#include "xfs_trace.h" #include "xfs_log.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" @@ -251,9 +247,9 @@ xfs_growfs_data( if (mp->m_sb.sb_imax_pct) { uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; do_div(icount, 100); - mp->m_maxicount = XFS_FSB_TO_INO(mp, icount); + M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount); } else - mp->m_maxicount = 0; + M_IGEO(mp)->maxicount = 0; /* Update secondary superblocks now the physical grow has completed */ error = xfs_update_secondary_sbs(mp); diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index d0d377384120..fa55ab8b8d80 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -4,7 +4,6 @@ * All Rights Reserved. */ #include "xfs.h" -#include "xfs_sysctl.h" /* * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, @@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = { #else .bug_on_assert = false, /* assert failures WARN() */ #endif +#ifdef DEBUG + .pwork_threads = -1, /* automatic thread detection */ +#endif }; diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 4c4929f9e7bf..8e0cb05a7142 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -9,12 +9,8 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trace.h" #include "xfs_health.h" @@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = { void xfs_bulkstat_health( struct xfs_inode *ip, - struct xfs_bstat *bs) + struct xfs_bulkstat *bs) { const struct ioctl_sick_map *m; unsigned int sick; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index a76b27565a18..0b0fd10a36d4 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -5,13 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" @@ -23,8 +23,6 @@ #include "xfs_dquot.h" #include "xfs_reflink.h" -#include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/iversion.h> /* diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 8381d34cb102..d99a0a3e5f40 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -6,14 +6,9 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" -#include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_mount.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_error.h" #include "xfs_icreate_item.h" #include "xfs_log.h" @@ -56,80 +51,18 @@ xfs_icreate_item_format( sizeof(struct xfs_icreate_log)); } - -/* Pinning has no meaning for the create item, so just return. */ STATIC void -xfs_icreate_item_pin( +xfs_icreate_item_release( struct xfs_log_item *lip) { + kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip)); } - -/* pinning has no meaning for the create item, so just return. */ -STATIC void -xfs_icreate_item_unpin( - struct xfs_log_item *lip, - int remove) -{ -} - -STATIC void -xfs_icreate_item_unlock( - struct xfs_log_item *lip) -{ - struct xfs_icreate_item *icp = ICR_ITEM(lip); - - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - kmem_zone_free(xfs_icreate_zone, icp); - return; -} - -/* - * Because we have ordered buffers being tracked in the AIL for the inode - * creation, we don't need the create item after this. Hence we can free - * the log item and return -1 to tell the caller we're done with the item. - */ -STATIC xfs_lsn_t -xfs_icreate_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - struct xfs_icreate_item *icp = ICR_ITEM(lip); - - kmem_zone_free(xfs_icreate_zone, icp); - return (xfs_lsn_t)-1; -} - -/* item can never get into the AIL */ -STATIC uint -xfs_icreate_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - ASSERT(0); - return XFS_ITEM_SUCCESS; -} - -/* Ordered buffers do the dependency tracking here, so this does nothing. */ -STATIC void -xfs_icreate_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all buf log items. - */ static const struct xfs_item_ops xfs_icreate_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, .iop_size = xfs_icreate_item_size, .iop_format = xfs_icreate_item_format, - .iop_pin = xfs_icreate_item_pin, - .iop_unpin = xfs_icreate_item_unpin, - .iop_push = xfs_icreate_item_push, - .iop_unlock = xfs_icreate_item_unlock, - .iop_committed = xfs_icreate_item_committed, - .iop_committing = xfs_icreate_item_committing, + .iop_release = xfs_icreate_item_release, }; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 71d216cf6f87..6467d5e1df2d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3,7 +3,6 @@ * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/log2.h> #include <linux/iversion.h> #include "xfs.h" @@ -16,10 +15,7 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" -#include "xfs_attr_sf.h" #include "xfs_attr.h" #include "xfs_trans_space.h" #include "xfs_trans.h" @@ -32,7 +28,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_filestream.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" @@ -40,7 +35,6 @@ #include "xfs_log.h" #include "xfs_bmap_btree.h" #include "xfs_reflink.h" -#include "xfs_dir2_priv.h" kmem_zone_t *xfs_inode_zone; @@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass) */ static void xfs_lock_inodes( - xfs_inode_t **ips, - int inodes, - uint lock_mode) + struct xfs_inode **ips, + int inodes, + uint lock_mode) { - int attempts = 0, i, j, try_lock; - xfs_log_item_t *lp; + int attempts = 0, i, j, try_lock; + struct xfs_log_item *lp; /* * Currently supports between 2 and 5 inodes with exclusive locking. We @@ -485,7 +479,7 @@ again: */ if (!try_lock) { for (j = (i - 1); j >= 0 && !try_lock; j--) { - lp = (xfs_log_item_t *)ips[j]->i_itemp; + lp = &ips[j]->i_itemp->ili_item; if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) try_lock++; } @@ -551,7 +545,7 @@ xfs_lock_two_inodes( struct xfs_inode *temp; uint mode_temp; int attempts = 0; - xfs_log_item_t *lp; + struct xfs_log_item *lp; ASSERT(hweight32(ip0_mode) == 1); ASSERT(hweight32(ip1_mode) == 1); @@ -585,7 +579,7 @@ xfs_lock_two_inodes( * the second lock. If we can't get it, we must release the first one * and try again. */ - lp = (xfs_log_item_t *)ip0->i_itemp; + lp = &ip0->i_itemp->ili_item; if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { xfs_iunlock(ip0, ip0_mode); @@ -2537,13 +2531,14 @@ xfs_ifree_cluster( xfs_inode_log_item_t *iip; struct xfs_log_item *lip; struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(mp); xfs_ino_t inum; inum = xic->first_ino; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); - nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster; + nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster; - for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) { + for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) { /* * The allocation bitmap tells us which inodes of the chunk were * physically allocated. Skip the cluster if an inode falls into @@ -2551,7 +2546,7 @@ xfs_ifree_cluster( */ ioffset = inum - xic->first_ino; if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { - ASSERT(ioffset % mp->m_inodes_per_cluster == 0); + ASSERT(ioffset % igeo->inodes_per_cluster == 0); continue; } @@ -2567,7 +2562,7 @@ xfs_ifree_cluster( * to mark all the active inodes on the buffer stale. */ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, - mp->m_bsize * mp->m_blocks_per_cluster, + mp->m_bsize * igeo->blocks_per_cluster, XBF_UNMAPPED); if (!bp) @@ -2614,7 +2609,7 @@ xfs_ifree_cluster( * transaction stale above, which means there is no point in * even trying to lock them. */ - for (i = 0; i < mp->m_inodes_per_cluster; i++) { + for (i = 0; i < igeo->inodes_per_cluster; i++) { retry: rcu_read_lock(); ip = radix_tree_lookup(&pag->pag_ici_root, @@ -3472,28 +3467,27 @@ xfs_iflush_cluster( struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; unsigned long first_index, mask; - unsigned long inodes_per_cluster; int cilist_size; struct xfs_inode **cilist; struct xfs_inode *cip; + struct xfs_ino_geometry *igeo = M_IGEO(mp); int nr_found; int clcount = 0; int i; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; - cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); + cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *); cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); if (!cilist) goto out_put; - mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); + mask = ~(igeo->inodes_per_cluster - 1); first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; rcu_read_lock(); /* really need a gang lookup range call here */ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, - first_index, inodes_per_cluster); + first_index, igeo->inodes_per_cluster); if (nr_found == 0) goto out_free; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fa1c4fe2ffbf..c9a502eed204 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -5,6 +5,7 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -12,7 +13,6 @@ #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_inode_item.h" -#include "xfs_error.h" #include "xfs_trace.h" #include "xfs_trans_priv.h" #include "xfs_buf_item.h" @@ -565,7 +565,7 @@ out_unlock: * Unlock the inode associated with the inode log item. */ STATIC void -xfs_inode_item_unlock( +xfs_inode_item_release( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); @@ -621,23 +621,21 @@ xfs_inode_item_committed( STATIC void xfs_inode_item_committing( struct xfs_log_item *lip, - xfs_lsn_t lsn) + xfs_lsn_t commit_lsn) { - INODE_ITEM(lip)->ili_last_lsn = lsn; + INODE_ITEM(lip)->ili_last_lsn = commit_lsn; + return xfs_inode_item_release(lip); } -/* - * This is the ops vector shared by all buf log items. - */ static const struct xfs_item_ops xfs_inode_item_ops = { .iop_size = xfs_inode_item_size, .iop_format = xfs_inode_item_format, .iop_pin = xfs_inode_item_pin, .iop_unpin = xfs_inode_item_unpin, - .iop_unlock = xfs_inode_item_unlock, + .iop_release = xfs_inode_item_release, .iop_committed = xfs_inode_item_committed, .iop_push = xfs_inode_item_push, - .iop_committing = xfs_inode_item_committing, + .iop_committing = xfs_inode_item_committing, .iop_error = xfs_inode_item_error }; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 27081eba220c..07a60e74c39c 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -14,7 +14,7 @@ struct xfs_inode; struct xfs_mount; typedef struct xfs_inode_log_item { - xfs_log_item_t ili_item; /* common portion */ + struct xfs_log_item ili_item; /* common portion */ struct xfs_inode *ili_inode; /* inode ptr */ xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d7dfc13f30f5..6f7848cd5527 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -11,9 +11,8 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_ioctl.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" #include "xfs_attr.h" @@ -25,7 +24,6 @@ #include "xfs_export.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_symlink.h" #include "xfs_trans.h" #include "xfs_acl.h" #include "xfs_btree.h" @@ -36,14 +34,8 @@ #include "xfs_ag.h" #include "xfs_health.h" -#include <linux/capability.h> -#include <linux/cred.h> -#include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/pagemap.h> -#include <linux/slab.h> -#include <linux/exportfs.h> /* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to @@ -721,16 +713,45 @@ out_unlock: return error; } +/* Return 0 on success or positive error */ +int +xfs_fsbulkstat_one_fmt( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) +{ + struct xfs_bstat bs1; + + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); + if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat)); +} + +int +xfs_fsinumbers_fmt( + struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp) +{ + struct xfs_inogrp ig1; + + xfs_inumbers_to_inogrp(&ig1, igrp); + if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp)); +} + STATIC int -xfs_ioc_bulkstat( +xfs_ioc_fsbulkstat( xfs_mount_t *mp, unsigned int cmd, void __user *arg) { - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + .ocount = 0, + }; + xfs_ino_t lastino; int error; /* done = 1 if there are more stats to get and if bulkstat */ @@ -742,41 +763,243 @@ xfs_ioc_bulkstat( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) + if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq))) return -EFAULT; - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; - if (cmd == XFS_IOC_FSINUMBERS) - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt); - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, - sizeof(xfs_bstat_t), NULL, &done); - else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - &done); + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request back end functions + * take the inode to start with, so we have to compute the startino + * parameter from lastino to maintain correct function. lastino == 0 + * is a special case because it has traditionally meant "first inode + * in filesystem". + */ + if (cmd == XFS_IOC_FSINUMBERS) { + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_inumbers(&breq, xfs_fsinumbers_fmt); + lastino = breq.startino - 1; + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) { + breq.startino = lastino; + breq.icount = 1; + error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt); + } else { /* XFS_IOC_FSBULKSTAT */ + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt); + lastino = breq.startino - 1; + } if (error) return error; - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -EFAULT; + if (bulkreq.lastip != NULL && + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) + return -EFAULT; - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -EFAULT; + if (bulkreq.ocount != NULL && + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) + return -EFAULT; + + return 0; +} + +/* Return 0 on success or positive error */ +static int +xfs_bulkstat_fmt( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) +{ + if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat)); +} + +/* + * Check the incoming bulk request @hdr from userspace and initialize the + * internal @breq bulk request appropriately. Returns 0 if the bulk request + * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual + * negative error code. + */ +static int +xfs_bulk_ireq_setup( + struct xfs_mount *mp, + struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq, + void __user *ubuffer) +{ + if (hdr->icount == 0 || + (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) || + memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) + return -EINVAL; + + breq->startino = hdr->ino; + breq->ubuffer = ubuffer; + breq->icount = hdr->icount; + breq->ocount = 0; + breq->flags = 0; + + /* + * The @ino parameter is a special value, so we must look it up here. + * We're not allowed to have IREQ_AGNO, and we only return one inode + * worth of data. + */ + if (hdr->flags & XFS_BULK_IREQ_SPECIAL) { + if (hdr->flags & XFS_BULK_IREQ_AGNO) + return -EINVAL; + + switch (hdr->ino) { + case XFS_BULK_IREQ_SPECIAL_ROOT: + hdr->ino = mp->m_sb.sb_rootino; + break; + default: + return -EINVAL; + } + breq->icount = 1; } + /* + * The IREQ_AGNO flag means that we only want results from a given AG. + * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is + * beyond the specified AG then we return no results. + */ + if (hdr->flags & XFS_BULK_IREQ_AGNO) { + if (hdr->agno >= mp->m_sb.sb_agcount) + return -EINVAL; + + if (breq->startino == 0) + breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0); + else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno) + return -EINVAL; + + breq->flags |= XFS_IBULK_SAME_AG; + + /* Asking for an inode past the end of the AG? We're done! */ + if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno) + return XFS_ITER_ABORT; + } else if (hdr->agno) + return -EINVAL; + + /* Asking for an inode past the end of the FS? We're done! */ + if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount) + return XFS_ITER_ABORT; + + return 0; +} + +/* + * Update the userspace bulk request @hdr to reflect the end state of the + * internal bulk request @breq. + */ +static void +xfs_bulk_ireq_teardown( + struct xfs_bulk_ireq *hdr, + struct xfs_ibulk *breq) +{ + hdr->ino = breq->startino; + hdr->ocount = breq->ocount; +} + +/* Handle the v5 bulkstat ioctl. */ +STATIC int +xfs_ioc_bulkstat( + struct xfs_mount *mp, + unsigned int cmd, + struct xfs_bulkstat_req __user *arg) +{ + struct xfs_bulk_ireq hdr; + struct xfs_ibulk breq = { + .mp = mp, + }; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) + return -EFAULT; + + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat); + if (error == XFS_ITER_ABORT) + goto out_teardown; + if (error < 0) + return error; + + error = xfs_bulkstat(&breq, xfs_bulkstat_fmt); + if (error) + return error; + +out_teardown: + xfs_bulk_ireq_teardown(&hdr, &breq); + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) + return -EFAULT; + + return 0; +} + +STATIC int +xfs_inumbers_fmt( + struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp) +{ + if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers))) + return -EFAULT; + return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers)); +} + +/* Handle the v5 inumbers ioctl. */ +STATIC int +xfs_ioc_inumbers( + struct xfs_mount *mp, + unsigned int cmd, + struct xfs_inumbers_req __user *arg) +{ + struct xfs_bulk_ireq hdr; + struct xfs_ibulk breq = { + .mp = mp, + }; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) + return -EFAULT; + + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); + if (error == XFS_ITER_ABORT) + goto out_teardown; + if (error < 0) + return error; + + error = xfs_inumbers(&breq, xfs_inumbers_fmt); + if (error) + return error; + +out_teardown: + xfs_bulk_ireq_teardown(&hdr, &breq); + if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr))) + return -EFAULT; + return 0; } @@ -879,37 +1102,44 @@ xfs_di2lxflags( return flags; } -STATIC int -xfs_ioc_fsgetxattr( - xfs_inode_t *ip, - int attr, - void __user *arg) +static void +xfs_fill_fsxattr( + struct xfs_inode *ip, + bool attr, + struct fsxattr *fa) { - struct fsxattr fa; - - memset(&fa, 0, sizeof(struct fsxattr)); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - fa.fsx_xflags = xfs_ip2xflags(ip); - fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_cowextsize = ip->i_d.di_cowextsize << + simple_fill_fsxattr(fa, xfs_ip2xflags(ip)); + fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa->fsx_cowextsize = ip->i_d.di_cowextsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = xfs_get_projid(ip); + fa->fsx_projid = xfs_get_projid(ip); if (attr) { if (ip->i_afp) { if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = xfs_iext_count(ip->i_afp); + fa->fsx_nextents = xfs_iext_count(ip->i_afp); else - fa.fsx_nextents = ip->i_d.di_anextents; + fa->fsx_nextents = ip->i_d.di_anextents; } else - fa.fsx_nextents = 0; + fa->fsx_nextents = 0; } else { if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = xfs_iext_count(&ip->i_df); + fa->fsx_nextents = xfs_iext_count(&ip->i_df); else - fa.fsx_nextents = ip->i_d.di_nextents; + fa->fsx_nextents = ip->i_d.di_nextents; } +} + +STATIC int +xfs_ioc_fsgetxattr( + xfs_inode_t *ip, + int attr, + void __user *arg) +{ + struct fsxattr fa; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + xfs_fill_fsxattr(ip, attr, &fa); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (copy_to_user(arg, &fa, sizeof(fa))) @@ -1035,15 +1265,6 @@ xfs_ioctl_setattr_xflags( if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip)) return -EINVAL; - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if (((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE | XFS_DIFLAG_APPEND)) || - (fa->fsx_xflags & (FS_XFLAG_IMMUTABLE | FS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) - return -EPERM; - /* diflags2 only valid for v3 inodes. */ di_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags); if (di_flags2 && ip->i_d.di_version < 3) @@ -1202,39 +1423,31 @@ xfs_ioctl_setattr_check_extsize( struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; - - if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode)) - return -EINVAL; - - if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) && - !S_ISDIR(VFS_I(ip)->i_mode)) - return -EINVAL; + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) return -EINVAL; - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) - return -EINVAL; + if (fa->fsx_extsize == 0) + return 0; - if (XFS_IS_REALTIME_INODE(ip) || - (fa->fsx_xflags & FS_XFLAG_REALTIME)) { - size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; - } + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) + return -EINVAL; - if (fa->fsx_extsize % size) + if (XFS_IS_REALTIME_INODE(ip) || + (fa->fsx_xflags & FS_XFLAG_REALTIME)) { + size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) return -EINVAL; - } else - fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE | FS_XFLAG_EXTSZINHERIT); + } + + if (fa->fsx_extsize % size) + return -EINVAL; return 0; } @@ -1260,6 +1473,8 @@ xfs_ioctl_setattr_check_cowextsize( struct fsxattr *fa) { struct xfs_mount *mp = ip->i_mount; + xfs_extlen_t size; + xfs_fsblock_t cowextsize_fsb; if (!(fa->fsx_xflags & FS_XFLAG_COWEXTSIZE)) return 0; @@ -1268,25 +1483,19 @@ xfs_ioctl_setattr_check_cowextsize( ip->i_d.di_version != 3) return -EINVAL; - if (!S_ISREG(VFS_I(ip)->i_mode) && !S_ISDIR(VFS_I(ip)->i_mode)) - return -EINVAL; - - if (fa->fsx_cowextsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t cowextsize_fsb; + if (fa->fsx_cowextsize == 0) + return 0; - cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); - if (cowextsize_fsb > MAXEXTLEN) - return -EINVAL; + cowextsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_cowextsize); + if (cowextsize_fsb > MAXEXTLEN) + return -EINVAL; - size = mp->m_sb.sb_blocksize; - if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) - return -EINVAL; + size = mp->m_sb.sb_blocksize; + if (cowextsize_fsb > mp->m_sb.sb_agblocks / 2) + return -EINVAL; - if (fa->fsx_cowextsize % size) - return -EINVAL; - } else - fa->fsx_xflags &= ~FS_XFLAG_COWEXTSIZE; + if (fa->fsx_cowextsize % size) + return -EINVAL; return 0; } @@ -1300,21 +1509,6 @@ xfs_ioctl_setattr_check_projid( if (fa->fsx_projid > (uint16_t)-1 && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return -EINVAL; - - /* - * Project Quota ID state is only allowed to change from within the init - * namespace. Enforce that restriction only if we are trying to change - * the quota ID state. Everything else is allowed in user namespaces. - */ - if (current_user_ns() == &init_user_ns) - return 0; - - if (xfs_get_projid(ip) != fa->fsx_projid) - return -EINVAL; - if ((fa->fsx_xflags & FS_XFLAG_PROJINHERIT) != - (ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) - return -EINVAL; - return 0; } @@ -1323,6 +1517,7 @@ xfs_ioctl_setattr( xfs_inode_t *ip, struct fsxattr *fa) { + struct fsxattr old_fa; struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct xfs_dquot *udqp = NULL; @@ -1370,7 +1565,6 @@ xfs_ioctl_setattr( goto error_free_dquots; } - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && xfs_get_projid(ip) != fa->fsx_projid) { code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp, @@ -1379,6 +1573,11 @@ xfs_ioctl_setattr( goto error_trans_cancel; } + xfs_fill_fsxattr(ip, false, &old_fa); + code = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, fa); + if (code) + goto error_trans_cancel; + code = xfs_ioctl_setattr_check_extsize(ip, fa); if (code) goto error_trans_cancel; @@ -1489,6 +1688,7 @@ xfs_ioc_setxflags( { struct xfs_trans *tp; struct fsxattr fa; + struct fsxattr old_fa; unsigned int flags; int join_flags = 0; int error; @@ -1524,6 +1724,13 @@ xfs_ioc_setxflags( goto out_drop_write; } + xfs_fill_fsxattr(ip, false, &old_fa); + error = vfs_ioc_fssetxattr_check(VFS_I(ip), &old_fa, &fa); + if (error) { + xfs_trans_cancel(tp); + goto out_drop_write; + } + error = xfs_ioctl_setattr_xflags(tp, ip, &fa); if (error) { xfs_trans_cancel(tp); @@ -1942,7 +2149,12 @@ xfs_file_ioctl( case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: + return xfs_ioc_fsbulkstat(mp, cmd, arg); + + case XFS_IOC_BULKSTAT: return xfs_ioc_bulkstat(mp, cmd, arg); + case XFS_IOC_INUMBERS: + return xfs_ioc_inumbers(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry(mp, arg, 3); diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index 4b17f67c888a..654c0bb1bcf8 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -77,4 +77,12 @@ xfs_set_dmattrs( uint evmask, uint16_t state); +struct xfs_ibulk; +struct xfs_bstat; +struct xfs_inogrp; + +int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat); +int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp); + #endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 614fc6886d24..7fcf7569743f 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -3,23 +3,19 @@ * Copyright (c) 2004-2005 Silicon Graphics, Inc. * All Rights Reserved. */ -#include <linux/compat.h> -#include <linux/ioctl.h> #include <linux/mount.h> -#include <linux/slab.h> -#include <linux/uaccess.h> #include <linux/fsmap.h> #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" -#include "xfs_error.h" #include "xfs_fsops.h" -#include "xfs_alloc.h" #include "xfs_rtalloc.h" #include "xfs_attr.h" #include "xfs_ioctl.h" @@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin( } STATIC int -xfs_inumbers_fmt_compat( - void __user *ubuffer, - const struct xfs_inogrp *buffer, - long count, - long *written) +xfs_fsinumbers_fmt_compat( + struct xfs_ibulk *breq, + const struct xfs_inumbers *ig) { - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; + struct compat_xfs_inogrp __user *p32 = breq->ubuffer; + struct xfs_inogrp ig1; + struct xfs_inogrp *igrp = &ig1; - for (i = 0; i < count; i++) { - if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || - put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || - put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -EFAULT; - } - *written = count * sizeof(*p32); - return 0; + xfs_inumbers_to_inogrp(&ig1, ig); + + if (put_user(igrp->xi_startino, &p32->xi_startino) || + put_user(igrp->xi_alloccount, &p32->xi_alloccount) || + put_user(igrp->xi_allocmask, &p32->xi_allocmask)) + return -EFAULT; + + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp)); } #else -#define xfs_inumbers_fmt_compat xfs_inumbers_fmt +#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt #endif /* BROKEN_X86_ALIGNMENT */ STATIC int @@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin( return 0; } -/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +/* + * struct xfs_bstat has differing alignment on intel, & bstime_t sizes + * everywhere + */ STATIC int xfs_ioctl32_bstat_copyin( - xfs_bstat_t *bstat, - compat_xfs_bstat_t __user *bstat32) + struct xfs_bstat *bstat, + struct compat_xfs_bstat __user *bstat32) { if (get_user(bstat->bs_ino, &bstat32->bs_ino) || get_user(bstat->bs_mode, &bstat32->bs_mode) || @@ -171,16 +169,15 @@ xfs_bstime_store_compat( /* Return 0 on success or positive error (to xfs_bulkstat()) */ STATIC int -xfs_bulkstat_one_fmt_compat( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) +xfs_fsbulkstat_one_fmt_compat( + struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat) { - compat_xfs_bstat_t __user *p32 = ubuffer; + struct compat_xfs_bstat __user *p32 = breq->ubuffer; + struct xfs_bstat bs1; + struct xfs_bstat *buffer = &bs1; - if (ubsize < sizeof(*p32)) - return -ENOMEM; + xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat); if (put_user(buffer->bs_ino, &p32->bs_ino) || put_user(buffer->bs_mode, &p32->bs_mode) || @@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat( put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) return -EFAULT; - if (ubused) - *ubused = sizeof(*p32); - return 0; -} -STATIC int -xfs_bulkstat_one_compat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ -{ - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, - ubused, stat); + return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat)); } /* copied from xfs_ioctl.c */ STATIC int -xfs_compat_ioc_bulkstat( +xfs_compat_ioc_fsbulkstat( xfs_mount_t *mp, unsigned int cmd, - compat_xfs_fsop_bulkreq_t __user *p32) + struct compat_xfs_fsop_bulkreq __user *p32) { u32 addr; - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; + struct xfs_fsop_bulkreq bulkreq; + struct xfs_ibulk breq = { + .mp = mp, + .ocount = 0, + }; + xfs_ino_t lastino; int error; /* @@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat( * to userpace memory via bulkreq.ubuffer. Normally the compat * functions and structure size are the correct ones to use ... */ - inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; - bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; - size_t bs_one_size = sizeof(struct compat_xfs_bstat); + inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat; + bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat; #ifdef CONFIG_X86_X32 if (in_x32_syscall()) { @@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat( * the data written out in compat layout will not match what * x32 userspace expects. */ - inumbers_func = xfs_inumbers_fmt; - bs_one_func = xfs_bulkstat_one; - bs_one_size = sizeof(struct xfs_bstat); + inumbers_func = xfs_fsinumbers_fmt; + bs_one_func = xfs_fsbulkstat_one_fmt; } #endif @@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat( return -EFAULT; bulkreq.ocount = compat_ptr(addr); - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64))) return -EFAULT; - if ((count = bulkreq.icount) <= 0) + if (bulkreq.icount <= 0) return -EINVAL; if (bulkreq.ubuffer == NULL) return -EINVAL; + breq.ubuffer = bulkreq.ubuffer; + breq.icount = bulkreq.icount; + + /* + * FSBULKSTAT_SINGLE expects that *lastip contains the inode number + * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect + * that *lastip contains either zero or the number of the last inode to + * be examined by the previous call and return results starting with + * the next inode after that. The new bulk request back end functions + * take the inode to start with, so we have to compute the startino + * parameter from lastino to maintain correct function. lastino == 0 + * is a special case because it has traditionally meant "first inode + * in filesystem". + */ if (cmd == XFS_IOC_FSINUMBERS_32) { - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, inumbers_func); + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_inumbers(&breq, inumbers_func); + lastino = breq.startino - 1; } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { - int res; - - error = bs_one_func(mp, inlast, bulkreq.ubuffer, - bs_one_size, NULL, &res); + breq.startino = lastino; + breq.icount = 1; + error = xfs_bulkstat_one(&breq, bs_one_func); + lastino = breq.startino; } else if (cmd == XFS_IOC_FSBULKSTAT_32) { - error = xfs_bulkstat(mp, &inlast, &count, - bs_one_func, bs_one_size, - bulkreq.ubuffer, &done); - } else + breq.startino = lastino ? lastino + 1 : 0; + error = xfs_bulkstat(&breq, bs_one_func); + lastino = breq.startino - 1; + } else { error = -EINVAL; + } if (error) return error; - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -EFAULT; + if (bulkreq.lastip != NULL && + copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t))) + return -EFAULT; - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -EFAULT; - } + if (bulkreq.ocount != NULL && + copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32))) + return -EFAULT; return 0; } @@ -577,6 +574,8 @@ xfs_file_compat_ioctl( case XFS_IOC_ERROR_CLEARALL: case FS_IOC_GETFSMAP: case XFS_IOC_SCRUB_METADATA: + case XFS_IOC_BULKSTAT: + case XFS_IOC_INUMBERS: return xfs_file_ioctl(filp, cmd, p); #if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) /* @@ -674,7 +673,7 @@ xfs_file_compat_ioctl( case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: - return xfs_compat_ioc_bulkstat(mp, cmd, arg); + return xfs_compat_ioc_fsbulkstat(mp, cmd, arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: case XFS_IOC_PATH_TO_FSHANDLE_32: { diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index d28fa824284a..7985344d3aa6 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime { __s32 tv_nsec; /* and nanoseconds */ } compat_xfs_bstime_t; -typedef struct compat_xfs_bstat { +struct compat_xfs_bstat { __u64 bs_ino; /* inode number */ __u16 bs_mode; /* type and mode */ __u16 bs_nlink; /* number of links */ @@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat { __u32 bs_dmevmask; /* DMIG event mask */ __u16 bs_dmstate; /* DMIG state info */ __u16 bs_aextents; /* attribute number of extents */ -} __compat_packed compat_xfs_bstat_t; +} __compat_packed; -typedef struct compat_xfs_fsop_bulkreq { +struct compat_xfs_fsop_bulkreq { compat_uptr_t lastip; /* last inode # pointer */ __s32 icount; /* count of entries in buffer */ compat_uptr_t ubuffer; /* user buffer for inode desc. */ compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; +}; #define XFS_IOC_FSBULKSTAT_32 \ _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) @@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext { xfs_off_t sx_offset; /* offset into file */ xfs_off_t sx_length; /* leng from offset */ char sx_pad[16]; /* pad space, unused */ - compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ + struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */ } __compat_packed compat_xfs_swapext_t; #define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) @@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 { #define XFS_IOC_FSGEOMETRY_V1_32 \ _IOR('X', 100, struct compat_xfs_fsop_geom_v1) -typedef struct compat_xfs_inogrp { +struct compat_xfs_inogrp { __u64 xi_startino; /* starting inode number */ __s32 xi_alloccount; /* # bits set in allocmask */ __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; +} __attribute__((packed)); /* These growfs input structures have padding on the end, so must translate */ typedef struct compat_xfs_growfs_data { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 63d323916bba..3a4310d7cb59 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -4,7 +4,6 @@ * Copyright (c) 2016-2018 Christoph Hellwig. * All Rights Reserved. */ -#include <linux/iomap.h> #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" @@ -12,7 +11,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap_btree.h" @@ -25,7 +23,6 @@ #include "xfs_inode_item.h" #include "xfs_iomap.h" #include "xfs_trace.h" -#include "xfs_icache.h" #include "xfs_quota.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" @@ -779,7 +776,7 @@ xfs_iomap_write_unwritten( * complete here and might deadlock on the iolock. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); + XFS_TRANS_RESERVE, &tp); if (error) return error; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 74047bd0c1ae..ff3c1fae5357 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -10,30 +10,20 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_acl.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_attr.h" #include "xfs_trans.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_symlink.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" -#include "xfs_trans_space.h" #include "xfs_iomap.h" -#include "xfs_defer.h" -#include <linux/capability.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/security.h> -#include <linux/iomap.h> -#include <linux/slab.h> #include <linux/iversion.h> /* diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 1e1a0af1dd34..a8a06bb78ea8 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -14,46 +14,66 @@ #include "xfs_btree.h" #include "xfs_ialloc.h" #include "xfs_ialloc_btree.h" +#include "xfs_iwalk.h" #include "xfs_itable.h" #include "xfs_error.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_health.h" /* - * Return stat information for one inode. - * Return 0 if ok, else errno. + * Bulk Stat + * ========= + * + * Use the inode walking functions to fill out struct xfs_bulkstat for every + * allocated inode, then pass the stat information to some externally provided + * iteration function. */ -int + +struct xfs_bstat_chunk { + bulkstat_one_fmt_pf formatter; + struct xfs_ibulk *breq; + struct xfs_bulkstat *buf; +}; + +/* + * Fill out the bulkstat info for a single inode and report it somewhere. + * + * bc->breq->lastino is effectively the inode cursor as we walk through the + * filesystem. Therefore, we update it any time we need to move the cursor + * forward, regardless of whether or not we're sending any bstat information + * back to userspace. If the inode is internal metadata or, has been freed + * out from under us, we just simply keep going. + * + * However, if any other type of error happens we want to stop right where we + * are so that userspace will call back with exact number of the bad inode and + * we can send back an error code. + * + * Note that if the formatter tells us there's no space left in the buffer we + * move the cursor forward and abort the walk. + */ +STATIC int xfs_bulkstat_one_int( - struct xfs_mount *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + struct xfs_bstat_chunk *bc) { struct xfs_icdinode *dic; /* dinode core info pointer */ struct xfs_inode *ip; /* incore inode pointer */ struct inode *inode; - struct xfs_bstat *buf; /* return buffer */ - int error = 0; /* error value */ + struct xfs_bulkstat *buf = bc->buf; + int error = -EINVAL; - *stat = BULKSTAT_RV_NOTHING; + if (xfs_internal_inum(mp, ino)) + goto out_advance; - if (!buffer || xfs_internal_inum(mp, ino)) - return -EINVAL; - - buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); - if (!buf) - return -ENOMEM; - - error = xfs_iget(mp, NULL, ino, + error = xfs_iget(mp, tp, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); + if (error == -ENOENT || error == -EINVAL) + goto out_advance; if (error) - goto out_free; + goto out; ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); @@ -64,37 +84,35 @@ xfs_bulkstat_one_int( /* xfs_iget returns the following without needing * further change. */ - buf->bs_projid_lo = dic->di_projid_lo; - buf->bs_projid_hi = dic->di_projid_hi; + buf->bs_projectid = xfs_get_projid(ip); buf->bs_ino = ino; buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; buf->bs_size = dic->di_size; buf->bs_nlink = inode->i_nlink; - buf->bs_atime.tv_sec = inode->i_atime.tv_sec; - buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; - buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; - buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; - buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; - buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; + buf->bs_atime = inode->i_atime.tv_sec; + buf->bs_atime_nsec = inode->i_atime.tv_nsec; + buf->bs_mtime = inode->i_mtime.tv_sec; + buf->bs_mtime_nsec = inode->i_mtime.tv_nsec; + buf->bs_ctime = inode->i_ctime.tv_sec; + buf->bs_ctime_nsec = inode->i_ctime.tv_nsec; + buf->bs_btime = dic->di_crtime.t_sec; + buf->bs_btime_nsec = dic->di_crtime.t_nsec; buf->bs_gen = inode->i_generation; buf->bs_mode = inode->i_mode; buf->bs_xflags = xfs_ip2xflags(ip); - buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; + buf->bs_extsize_blks = dic->di_extsize; buf->bs_extents = dic->di_nextents; - memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); xfs_bulkstat_health(ip, buf); - buf->bs_dmevmask = dic->di_dmevmask; - buf->bs_dmstate = dic->di_dmstate; buf->bs_aextents = dic->di_anextents; buf->bs_forkoff = XFS_IFORK_BOFF(ip); + buf->bs_version = XFS_BULKSTAT_VERSION_V5; if (dic->di_version == 3) { if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) - buf->bs_cowextsize = dic->di_cowextsize << - mp->m_sb.sb_blocklog; + buf->bs_cowextsize_blks = dic->di_cowextsize; } switch (dic->di_format) { @@ -118,385 +136,121 @@ xfs_bulkstat_one_int( xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); - error = formatter(buffer, ubsize, ubused, buf); - if (!error) - *stat = BULKSTAT_RV_DIDONE; + error = bc->formatter(bc->breq, buf); + if (error == XFS_IBULK_ABORT) + goto out_advance; + if (error) + goto out; - out_free: - kmem_free(buf); +out_advance: + /* + * Advance the cursor to the inode that comes after the one we just + * looked at. We want the caller to move along if the bulkstat + * information was copied successfully; if we tried to grab the inode + * but it's no longer allocated; or if it's internal metadata. + */ + bc->breq->startino = ino + 1; +out: return error; } -/* Return 0 on success or positive error */ -STATIC int -xfs_bulkstat_one_fmt( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) -{ - if (ubsize < sizeof(*buffer)) - return -ENOMEM; - if (copy_to_user(ubuffer, buffer, sizeof(*buffer))) - return -EFAULT; - if (ubused) - *ubused = sizeof(*buffer); - return 0; -} - +/* Bulkstat a single inode. */ int xfs_bulkstat_one( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ + struct xfs_ibulk *breq, + bulkstat_one_fmt_pf formatter) { - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt, ubused, stat); -} + struct xfs_bstat_chunk bc = { + .formatter = formatter, + .breq = breq, + }; + int error; -/* - * Loop over all clusters in a chunk for a given incore inode allocation btree - * record. Do a readahead if there are any allocated inodes in that cluster. - */ -STATIC void -xfs_bulkstat_ichunk_ra( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irec) -{ - xfs_agblock_t agbno; - struct blk_plug plug; - int i; /* inode chunk index */ - - agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); - - blk_start_plug(&plug); - for (i = 0; i < XFS_INODES_PER_CHUNK; - i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) { - if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) & - ~irec->ir_free) { - xfs_btree_reada_bufs(mp, agno, agbno, - mp->m_blocks_per_cluster, - &xfs_inode_buf_ops); - } - } - blk_finish_plug(&plug); -} + ASSERT(breq->icount == 1); -/* - * Lookup the inode chunk that the given inode lives in and then get the record - * if we found the chunk. If the inode was not the last in the chunk and there - * are some left allocated, update the data for the pointed-to record as well as - * return the count of grabbed inodes. - */ -STATIC int -xfs_bulkstat_grab_ichunk( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agino_t agino, /* starting inode of chunk */ - int *icount,/* return # of inodes grabbed */ - struct xfs_inobt_rec_incore *irec) /* btree record */ -{ - int idx; /* index into inode chunk */ - int stat; - int error = 0; - - /* Lookup the inode chunk that this inode lives in */ - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat); - if (error) - return error; - if (!stat) { - *icount = 0; - return error; - } + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), + KM_SLEEP | KM_MAYFAIL); + if (!bc.buf) + return -ENOMEM; - /* Get the record, should always work */ - error = xfs_inobt_get_rec(cur, irec, &stat); - if (error) - return error; - XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1); + error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc); - /* Check if the record contains the inode in request */ - if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) { - *icount = 0; - return 0; - } + kmem_free(bc.buf); - idx = agino - irec->ir_startino + 1; - if (idx < XFS_INODES_PER_CHUNK && - (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { - int i; - - /* We got a right chunk with some left inodes allocated at it. - * Grab the chunk record. Mark all the uninteresting inodes - * free -- because they're before our start point. - */ - for (i = 0; i < idx; i++) { - if (XFS_INOBT_MASK(i) & ~irec->ir_free) - irec->ir_freecount++; - } - - irec->ir_free |= xfs_inobt_maskn(0, idx); - *icount = irec->ir_count - irec->ir_freecount; - } + /* + * If we reported one inode to userspace then we abort because we hit + * the end of the buffer. Don't leak that back to userspace. + */ + if (error == XFS_IWALK_ABORT) + error = 0; - return 0; + return error; } -#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) - -struct xfs_bulkstat_agichunk { - char __user **ac_ubuffer;/* pointer into user's buffer */ - int ac_ubleft; /* bytes left in user's buffer */ - int ac_ubelem; /* spaces used in user's buffer */ -}; - -/* - * Process inodes in chunk with a pointer to a formatter function - * that will iget the inode and fill in the appropriate structure. - */ static int -xfs_bulkstat_ag_ichunk( - struct xfs_mount *mp, - xfs_agnumber_t agno, - struct xfs_inobt_rec_incore *irbp, - bulkstat_one_pf formatter, - size_t statstruct_size, - struct xfs_bulkstat_agichunk *acp, - xfs_agino_t *last_agino) +xfs_bulkstat_iwalk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) { - char __user **ubufp = acp->ac_ubuffer; - int chunkidx; - int error = 0; - xfs_agino_t agino = irbp->ir_startino; - - for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK; - chunkidx++, agino++) { - int fmterror; - int ubused; - - /* inode won't fit in buffer, we are done */ - if (acp->ac_ubleft < statstruct_size) - break; - - /* Skip if this inode is free */ - if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) - continue; - - /* Get the inode and fill in a single buffer */ - ubused = statstruct_size; - error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino), - *ubufp, acp->ac_ubleft, &ubused, &fmterror); - - if (fmterror == BULKSTAT_RV_GIVEUP || - (error && error != -ENOENT && error != -EINVAL)) { - acp->ac_ubleft = 0; - ASSERT(error); - break; - } - - /* be careful not to leak error if at end of chunk */ - if (fmterror == BULKSTAT_RV_NOTHING || error) { - error = 0; - continue; - } - - *ubufp += ubused; - acp->ac_ubleft -= ubused; - acp->ac_ubelem++; - } - - /* - * Post-update *last_agino. At this point, agino will always point one - * inode past the last inode we processed successfully. Hence we - * substract that inode when setting the *last_agino cursor so that we - * return the correct cookie to userspace. On the next bulkstat call, - * the inode under the lastino cookie will be skipped as we have already - * processed it here. - */ - *last_agino = agino - 1; + int error; + error = xfs_bulkstat_one_int(mp, tp, ino, data); + /* bulkstat just skips over missing inodes */ + if (error == -ENOENT || error == -EINVAL) + return 0; return error; } /* - * Return stat information in bulk (by-inode) for the filesystem. + * Check the incoming lastino parameter. + * + * We allow any inode value that could map to physical space inside the + * filesystem because if there are no inodes there, bulkstat moves on to the + * next chunk. In other words, the magic agino value of zero takes us to the + * first chunk in the AG, and an agino value past the end of the AG takes us to + * the first chunk in the next AG. + * + * Therefore we can end early if the requested inode is beyond the end of the + * filesystem or doesn't map properly. */ -int /* error status */ -xfs_bulkstat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastinop, /* last inode returned */ - int *ubcountp, /* size of buffer/count returned */ - bulkstat_one_pf formatter, /* func that'd fill a single buf */ - size_t statstruct_size, /* sizeof struct filling */ - char __user *ubuffer, /* buffer with inode stats */ - int *done) /* 1 if there are more stats to get */ +static inline bool +xfs_bulkstat_already_done( + struct xfs_mount *mp, + xfs_ino_t startino) { - xfs_buf_t *agbp; /* agi header buffer */ - xfs_agino_t agino; /* inode # in allocation group */ - xfs_agnumber_t agno; /* allocation group number */ - xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ - int nirbuf; /* size of irbuf */ - int ubcount; /* size of user's buffer */ - struct xfs_bulkstat_agichunk ac; - int error = 0; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino); - /* - * Get the last inode value, see if there's nothing to do. - */ - agno = XFS_INO_TO_AGNO(mp, *lastinop); - agino = XFS_INO_TO_AGINO(mp, *lastinop); - if (agno >= mp->m_sb.sb_agcount || - *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) { - *done = 1; - *ubcountp = 0; - return 0; - } + return agno >= mp->m_sb.sb_agcount || + startino != XFS_AGINO_TO_INO(mp, agno, agino); +} - ubcount = *ubcountp; /* statstruct's */ - ac.ac_ubuffer = &ubuffer; - ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; - ac.ac_ubelem = 0; +/* Return stat information in bulk (by-inode) for the filesystem. */ +int +xfs_bulkstat( + struct xfs_ibulk *breq, + bulkstat_one_fmt_pf formatter) +{ + struct xfs_bstat_chunk bc = { + .formatter = formatter, + .breq = breq, + }; + int error; - *ubcountp = 0; - *done = 0; + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) + return 0; - irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); - if (!irbuf) + bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat), + KM_SLEEP | KM_MAYFAIL); + if (!bc.buf) return -ENOMEM; - nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); - /* - * Loop over the allocation groups, starting from the last - * inode returned; 0 means start of the allocation group. - */ - while (agno < mp->m_sb.sb_agcount) { - struct xfs_inobt_rec_incore *irbp = irbuf; - struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf; - bool end_of_ag = false; - int icount = 0; - int stat; - - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) - break; - /* - * Allocate and initialize a btree cursor for ialloc btree. - */ - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO); - if (agino > 0) { - /* - * In the middle of an allocation group, we need to get - * the remainder of the chunk we're in. - */ - struct xfs_inobt_rec_incore r; - - error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r); - if (error) - goto del_cursor; - if (icount) { - irbp->ir_startino = r.ir_startino; - irbp->ir_holemask = r.ir_holemask; - irbp->ir_count = r.ir_count; - irbp->ir_freecount = r.ir_freecount; - irbp->ir_free = r.ir_free; - irbp++; - } - /* Increment to the next record */ - error = xfs_btree_increment(cur, 0, &stat); - } else { - /* Start of ag. Lookup the first inode chunk */ - error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat); - } - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - - /* - * Loop through inode btree records in this ag, - * until we run out of inodes or space in the buffer. - */ - while (irbp < irbufend && icount < ubcount) { - struct xfs_inobt_rec_incore r; - - error = xfs_inobt_get_rec(cur, &r, &stat); - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - - /* - * If this chunk has any allocated inodes, save it. - * Also start read-ahead now for this chunk. - */ - if (r.ir_freecount < r.ir_count) { - xfs_bulkstat_ichunk_ra(mp, agno, &r); - irbp->ir_startino = r.ir_startino; - irbp->ir_holemask = r.ir_holemask; - irbp->ir_count = r.ir_count; - irbp->ir_freecount = r.ir_freecount; - irbp->ir_free = r.ir_free; - irbp++; - icount += r.ir_count - r.ir_freecount; - } - error = xfs_btree_increment(cur, 0, &stat); - if (error || stat == 0) { - end_of_ag = true; - goto del_cursor; - } - cond_resched(); - } - - /* - * Drop the btree buffers and the agi buffer as we can't hold any - * of the locks these represent when calling iget. If there is a - * pending error, then we are done. - */ -del_cursor: - xfs_btree_del_cursor(cur, error); - xfs_buf_relse(agbp); - if (error) - break; - /* - * Now format all the good inodes into the user's buffer. The - * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer - * for the next loop iteration. - */ - irbufend = irbp; - for (irbp = irbuf; - irbp < irbufend && ac.ac_ubleft >= statstruct_size; - irbp++) { - error = xfs_bulkstat_ag_ichunk(mp, agno, irbp, - formatter, statstruct_size, &ac, - &agino); - if (error) - break; - - cond_resched(); - } - - /* - * If we've run out of space or had a formatting error, we - * are now done - */ - if (ac.ac_ubleft < statstruct_size || error) - break; - - if (end_of_ag) { - agno++; - agino = 0; - } - } - /* - * Done, we're either out of filesystem or space to put the data. - */ - kmem_free(irbuf); - *ubcountp = ac.ac_ubelem; + error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags, + xfs_bulkstat_iwalk, breq->icount, &bc); + + kmem_free(bc.buf); /* * We found some inodes, so clear the error status and return them. @@ -505,135 +259,136 @@ del_cursor: * triggered again and propagated to userspace as there will be no * formatted inodes in the buffer. */ - if (ac.ac_ubelem) + if (breq->ocount > 0) error = 0; - /* - * If we ran out of filesystem, lastino will point off the end of - * the filesystem so the next call will return immediately. - */ - *lastinop = XFS_AGINO_TO_INO(mp, agno, agino); - if (agno >= mp->m_sb.sb_agcount) - *done = 1; - return error; } -int -xfs_inumbers_fmt( - void __user *ubuffer, /* buffer to write to */ - const struct xfs_inogrp *buffer, /* buffer to read from */ - long count, /* # of elements to read */ - long *written) /* # of bytes written */ +/* Convert bulkstat (v5) to bstat (v1). */ +void +xfs_bulkstat_to_bstat( + struct xfs_mount *mp, + struct xfs_bstat *bs1, + const struct xfs_bulkstat *bstat) { - if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) - return -EFAULT; - *written = count * sizeof(*buffer); - return 0; + memset(bs1, 0, sizeof(struct xfs_bstat)); + bs1->bs_ino = bstat->bs_ino; + bs1->bs_mode = bstat->bs_mode; + bs1->bs_nlink = bstat->bs_nlink; + bs1->bs_uid = bstat->bs_uid; + bs1->bs_gid = bstat->bs_gid; + bs1->bs_rdev = bstat->bs_rdev; + bs1->bs_blksize = bstat->bs_blksize; + bs1->bs_size = bstat->bs_size; + bs1->bs_atime.tv_sec = bstat->bs_atime; + bs1->bs_mtime.tv_sec = bstat->bs_mtime; + bs1->bs_ctime.tv_sec = bstat->bs_ctime; + bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec; + bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec; + bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec; + bs1->bs_blocks = bstat->bs_blocks; + bs1->bs_xflags = bstat->bs_xflags; + bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks); + bs1->bs_extents = bstat->bs_extents; + bs1->bs_gen = bstat->bs_gen; + bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF; + bs1->bs_forkoff = bstat->bs_forkoff; + bs1->bs_projid_hi = bstat->bs_projectid >> 16; + bs1->bs_sick = bstat->bs_sick; + bs1->bs_checked = bstat->bs_checked; + bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks); + bs1->bs_dmevmask = 0; + bs1->bs_dmstate = 0; + bs1->bs_aextents = bstat->bs_aextents; +} + +struct xfs_inumbers_chunk { + inumbers_fmt_pf formatter; + struct xfs_ibulk *breq; +}; + +/* + * INUMBERS + * ======== + * This is how we export inode btree records to userspace, so that XFS tools + * can figure out where inodes are allocated. + */ + +/* + * Format the inode group structure and report it somewhere. + * + * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk + * through the filesystem so we move it forward unless there was a runtime + * error. If the formatter tells us the buffer is now full we also move the + * cursor forward and abort the walk. + */ +STATIC int +xfs_inumbers_walk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_agnumber_t agno, + const struct xfs_inobt_rec_incore *irec, + void *data) +{ + struct xfs_inumbers inogrp = { + .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino), + .xi_alloccount = irec->ir_count - irec->ir_freecount, + .xi_allocmask = ~irec->ir_free, + .xi_version = XFS_INUMBERS_VERSION_V5, + }; + struct xfs_inumbers_chunk *ic = data; + int error; + + error = ic->formatter(ic->breq, &inogrp); + if (error && error != XFS_IBULK_ABORT) + return error; + + ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) + + XFS_INODES_PER_CHUNK; + return error; } /* * Return inode number table for the filesystem. */ -int /* error status */ +int xfs_inumbers( - struct xfs_mount *mp,/* mount point for filesystem */ - xfs_ino_t *lastino,/* last inode returned */ - int *count,/* size of buffer/count returned */ - void __user *ubuffer,/* buffer with inode descriptions */ + struct xfs_ibulk *breq, inumbers_fmt_pf formatter) { - xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); - struct xfs_btree_cur *cur = NULL; - struct xfs_buf *agbp = NULL; - struct xfs_inogrp *buffer; - int bcount; - int left = *count; - int bufidx = 0; + struct xfs_inumbers_chunk ic = { + .formatter = formatter, + .breq = breq, + }; int error = 0; - *count = 0; - if (agno >= mp->m_sb.sb_agcount || - *lastino != XFS_AGINO_TO_INO(mp, agno, agino)) - return error; + if (xfs_bulkstat_already_done(breq->mp, breq->startino)) + return 0; - bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer))); - buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); - do { - struct xfs_inobt_rec_incore r; - int stat; - - if (!agbp) { - error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp); - if (error) - break; - - cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, - XFS_BTNUM_INO); - error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE, - &stat); - if (error) - break; - if (!stat) - goto next_ag; - } - - error = xfs_inobt_get_rec(cur, &r, &stat); - if (error) - break; - if (!stat) - goto next_ag; - - agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1; - buffer[bufidx].xi_startino = - XFS_AGINO_TO_INO(mp, agno, r.ir_startino); - buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount; - buffer[bufidx].xi_allocmask = ~r.ir_free; - if (++bufidx == bcount) { - long written; - - error = formatter(ubuffer, buffer, bufidx, &written); - if (error) - break; - ubuffer += written; - *count += bufidx; - bufidx = 0; - } - if (!--left) - break; - - error = xfs_btree_increment(cur, 0, &stat); - if (error) - break; - if (stat) - continue; - -next_ag: - xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); - cur = NULL; - xfs_buf_relse(agbp); - agbp = NULL; - agino = 0; - agno++; - } while (agno < mp->m_sb.sb_agcount); - - if (!error) { - if (bufidx) { - long written; - - error = formatter(ubuffer, buffer, bufidx, &written); - if (!error) - *count += bufidx; - } - *lastino = XFS_AGINO_TO_INO(mp, agno, agino); - } + error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags, + xfs_inumbers_walk, breq->icount, &ic); - kmem_free(buffer); - if (cur) - xfs_btree_del_cursor(cur, error); - if (agbp) - xfs_buf_relse(agbp); + /* + * We found some inode groups, so clear the error status and return + * them. The lastino pointer will point directly at the inode that + * triggered any error that occurred, so on the next call the error + * will be triggered again and propagated to userspace as there will be + * no formatted inode groups in the buffer. + */ + if (breq->ocount > 0) + error = 0; return error; } + +/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */ +void +xfs_inumbers_to_inogrp( + struct xfs_inogrp *ig1, + const struct xfs_inumbers *ig) +{ + ig1->xi_startino = ig->xi_startino; + ig1->xi_alloccount = ig->xi_alloccount; + ig1->xi_allocmask = ig->xi_allocmask; +} diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 8a822285b671..e90c1fc5b981 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -5,83 +5,55 @@ #ifndef __XFS_ITABLE_H__ #define __XFS_ITABLE_H__ -/* - * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat - * structures (by the dmi library). This is a pointer to a formatter function - * that will iget the inode and fill in the appropriate structure. - * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c - */ -typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - int *ubused, - int *stat); +/* In-memory representation of a userspace request for batch inode data. */ +struct xfs_ibulk { + struct xfs_mount *mp; + void __user *ubuffer; /* user output buffer */ + xfs_ino_t startino; /* start with this inode */ + unsigned int icount; /* number of elements in ubuffer */ + unsigned int ocount; /* number of records returned */ + unsigned int flags; /* see XFS_IBULK_FLAG_* */ +}; + +/* Only iterate within the same AG as startino */ +#define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG) + +/* Return value that means we want to abort the walk. */ +#define XFS_IBULK_ABORT (XFS_IWALK_ABORT) /* - * Values for stat return value. + * Advance the user buffer pointer by one record of the given size. If the + * buffer is now full, return the appropriate error code. */ -#define BULKSTAT_RV_NOTHING 0 -#define BULKSTAT_RV_DIDONE 1 -#define BULKSTAT_RV_GIVEUP 2 +static inline int +xfs_ibulk_advance( + struct xfs_ibulk *breq, + size_t bytes) +{ + char __user *b = breq->ubuffer; + + breq->ubuffer = b + bytes; + breq->ocount++; + return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0; +} /* * Return stat information in bulk (by-inode) for the filesystem. */ -int /* error status */ -xfs_bulkstat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *lastino, /* last inode returned */ - int *count, /* size of buffer/count returned */ - bulkstat_one_pf formatter, /* func that'd fill a single buf */ - size_t statstruct_size,/* sizeof struct that we're filling */ - char __user *ubuffer,/* buffer with inode stats */ - int *done); /* 1 if there are more stats to get */ - -typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */ - void __user *ubuffer, /* buffer to write to */ - int ubsize, /* remaining user buffer sz */ - int *ubused, /* bytes used by formatter */ - const xfs_bstat_t *buffer); /* buffer to read from */ - -int -xfs_bulkstat_one_int( - xfs_mount_t *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - bulkstat_one_fmt_pf formatter, - int *ubused, - int *stat); -int -xfs_bulkstat_one( - xfs_mount_t *mp, - xfs_ino_t ino, - void __user *buffer, - int ubsize, - int *ubused, - int *stat); +typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq, + const struct xfs_bulkstat *bstat); -typedef int (*inumbers_fmt_pf)( - void __user *ubuffer, /* buffer to write to */ - const xfs_inogrp_t *buffer, /* buffer to read from */ - long count, /* # of elements to read */ - long *written); /* # of bytes written */ +int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); +int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter); +void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1, + const struct xfs_bulkstat *bstat); -int -xfs_inumbers_fmt( - void __user *ubuffer, /* buffer to write to */ - const xfs_inogrp_t *buffer, /* buffer to read from */ - long count, /* # of elements to read */ - long *written); /* # of bytes written */ +typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq, + const struct xfs_inumbers *igrp); -int /* error status */ -xfs_inumbers( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t *last, /* last inode returned */ - int *count, /* size of buffer/count returned */ - void __user *buffer, /* buffer with inode info */ - inumbers_fmt_pf formatter); +int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter); +void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1, + const struct xfs_inumbers *ig); #endif /* __XFS_ITABLE_H__ */ diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c new file mode 100644 index 000000000000..8c7d727149ea --- /dev/null +++ b/fs/xfs/xfs_iwalk.c @@ -0,0 +1,720 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h" +#include "xfs_iwalk.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_icache.h" +#include "xfs_health.h" +#include "xfs_trans.h" +#include "xfs_pwork.h" + +/* + * Walking Inodes in the Filesystem + * ================================ + * + * This iterator function walks a subset of filesystem inodes in increasing + * order from @startino until there are no more inodes. For each allocated + * inode it finds, it calls a walk function with the relevant inode number and + * a pointer to caller-provided data. The walk function can return the usual + * negative error code to stop the iteration; 0 to continue the iteration; or + * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the + * caller. + * + * Internally, we allow the walk function to do anything, which means that we + * cannot maintain the inobt cursor or our lock on the AGI buffer. We + * therefore cache the inobt records in kernel memory and only call the walk + * function when our memory buffer is full. @nr_recs is the number of records + * that we've cached, and @sz_recs is the size of our cache. + * + * It is the responsibility of the walk function to ensure it accesses + * allocated inodes, as the inobt records may be stale by the time they are + * acted upon. + */ + +struct xfs_iwalk_ag { + /* parallel work control data; will be null if single threaded */ + struct xfs_pwork pwork; + + struct xfs_mount *mp; + struct xfs_trans *tp; + + /* Where do we start the traversal? */ + xfs_ino_t startino; + + /* Array of inobt records we cache. */ + struct xfs_inobt_rec_incore *recs; + + /* Number of entries allocated for the @recs array. */ + unsigned int sz_recs; + + /* Number of entries in the @recs array that are in use. */ + unsigned int nr_recs; + + /* Inode walk function and data pointer. */ + xfs_iwalk_fn iwalk_fn; + xfs_inobt_walk_fn inobt_walk_fn; + void *data; + + /* + * Make it look like the inodes up to startino are free so that + * bulkstat can start its inode iteration at the correct place without + * needing to special case everywhere. + */ + unsigned int trim_start:1; + + /* Skip empty inobt records? */ + unsigned int skip_empty:1; +}; + +/* + * Loop over all clusters in a chunk for a given incore inode allocation btree + * record. Do a readahead if there are any allocated inodes in that cluster. + */ +STATIC void +xfs_iwalk_ichunk_ra( + struct xfs_mount *mp, + xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irec) +{ + struct xfs_ino_geometry *igeo = M_IGEO(mp); + xfs_agblock_t agbno; + struct blk_plug plug; + int i; /* inode chunk index */ + + agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino); + + blk_start_plug(&plug); + for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) { + xfs_inofree_t imask; + + imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster); + if (imask & ~irec->ir_free) { + xfs_btree_reada_bufs(mp, agno, agbno, + igeo->blocks_per_cluster, + &xfs_inode_buf_ops); + } + agbno += igeo->blocks_per_cluster; + } + blk_finish_plug(&plug); +} + +/* + * Set the bits in @irec's free mask that correspond to the inodes before + * @agino so that we skip them. This is how we restart an inode walk that was + * interrupted in the middle of an inode record. + */ +STATIC void +xfs_iwalk_adjust_start( + xfs_agino_t agino, /* starting inode of chunk */ + struct xfs_inobt_rec_incore *irec) /* btree record */ +{ + int idx; /* index into inode chunk */ + int i; + + idx = agino - irec->ir_startino; + + /* + * We got a right chunk with some left inodes allocated at it. Grab + * the chunk record. Mark all the uninteresting inodes free because + * they're before our start point. + */ + for (i = 0; i < idx; i++) { + if (XFS_INOBT_MASK(i) & ~irec->ir_free) + irec->ir_freecount++; + } + + irec->ir_free |= xfs_inobt_maskn(0, idx); +} + +/* Allocate memory for a walk. */ +STATIC int +xfs_iwalk_alloc( + struct xfs_iwalk_ag *iwag) +{ + size_t size; + + ASSERT(iwag->recs == NULL); + iwag->nr_recs = 0; + + /* Allocate a prefetch buffer for inobt records. */ + size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore); + iwag->recs = kmem_alloc(size, KM_MAYFAIL); + if (iwag->recs == NULL) + return -ENOMEM; + + return 0; +} + +/* Free memory we allocated for a walk. */ +STATIC void +xfs_iwalk_free( + struct xfs_iwalk_ag *iwag) +{ + kmem_free(iwag->recs); + iwag->recs = NULL; +} + +/* For each inuse inode in each cached inobt record, call our function. */ +STATIC int +xfs_iwalk_ag_recs( + struct xfs_iwalk_ag *iwag) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + xfs_ino_t ino; + unsigned int i, j; + xfs_agnumber_t agno; + int error; + + agno = XFS_INO_TO_AGNO(mp, iwag->startino); + for (i = 0; i < iwag->nr_recs; i++) { + struct xfs_inobt_rec_incore *irec = &iwag->recs[i]; + + trace_xfs_iwalk_ag_rec(mp, agno, irec); + + if (xfs_pwork_want_abort(&iwag->pwork)) + return 0; + + if (iwag->inobt_walk_fn) { + error = iwag->inobt_walk_fn(mp, tp, agno, irec, + iwag->data); + if (error) + return error; + } + + if (!iwag->iwalk_fn) + continue; + + for (j = 0; j < XFS_INODES_PER_CHUNK; j++) { + if (xfs_pwork_want_abort(&iwag->pwork)) + return 0; + + /* Skip if this inode is free */ + if (XFS_INOBT_MASK(j) & irec->ir_free) + continue; + + /* Otherwise call our function. */ + ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j); + error = iwag->iwalk_fn(mp, tp, ino, iwag->data); + if (error) + return error; + } + } + + return 0; +} + +/* Delete cursor and let go of AGI. */ +static inline void +xfs_iwalk_del_inobt( + struct xfs_trans *tp, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp, + int error) +{ + if (*curpp) { + xfs_btree_del_cursor(*curpp, error); + *curpp = NULL; + } + if (*agi_bpp) { + xfs_trans_brelse(tp, *agi_bpp); + *agi_bpp = NULL; + } +} + +/* + * Set ourselves up for walking inobt records starting from a given point in + * the filesystem. + * + * If caller passed in a nonzero start inode number, load the record from the + * inobt and make the record look like all the inodes before agino are free so + * that we skip them, and then move the cursor to the next inobt record. This + * is how we support starting an iwalk in the middle of an inode chunk. + * + * If the caller passed in a start number of zero, move the cursor to the first + * inobt record. + * + * The caller is responsible for cleaning up the cursor and buffer pointer + * regardless of the error status. + */ +STATIC int +xfs_iwalk_ag_start( + struct xfs_iwalk_ag *iwag, + xfs_agnumber_t agno, + xfs_agino_t agino, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp, + int *has_more) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + struct xfs_inobt_rec_incore *irec; + int error; + + /* Set up a fresh cursor and empty the inobt cache. */ + iwag->nr_recs = 0; + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); + if (error) + return error; + + /* Starting at the beginning of the AG? That's easy! */ + if (agino == 0) + return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more); + + /* + * Otherwise, we have to grab the inobt record where we left off, stuff + * the record into our cache, and then see if there are more records. + * We require a lookup cache of at least two elements so that the + * caller doesn't have to deal with tearing down the cursor to walk the + * records. + */ + error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more); + if (error) + return error; + + /* + * If the LE lookup at @agino yields no records, jump ahead to the + * inobt cursor increment to see if there are more records to process. + */ + if (!*has_more) + goto out_advance; + + /* Get the record, should always work */ + irec = &iwag->recs[iwag->nr_recs]; + error = xfs_inobt_get_rec(*curpp, irec, has_more); + if (error) + return error; + XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1); + + /* + * If the LE lookup yielded an inobt record before the cursor position, + * skip it and see if there's another one after it. + */ + if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) + goto out_advance; + + /* + * If agino fell in the middle of the inode record, make it look like + * the inodes up to agino are free so that we don't return them again. + */ + if (iwag->trim_start) + xfs_iwalk_adjust_start(agino, irec); + + /* + * The prefetch calculation is supposed to give us a large enough inobt + * record cache that grab_ichunk can stage a partial first record and + * the loop body can cache a record without having to check for cache + * space until after it reads an inobt record. + */ + iwag->nr_recs++; + ASSERT(iwag->nr_recs < iwag->sz_recs); + +out_advance: + return xfs_btree_increment(*curpp, 0, has_more); +} + +/* + * The inobt record cache is full, so preserve the inobt cursor state and + * run callbacks on the cached inobt records. When we're done, restore the + * cursor state to wherever the cursor would have been had the cache not been + * full (and therefore we could've just incremented the cursor) if *@has_more + * is true. On exit, *@has_more will indicate whether or not the caller should + * try for more inode records. + */ +STATIC int +xfs_iwalk_run_callbacks( + struct xfs_iwalk_ag *iwag, + xfs_agnumber_t agno, + struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp, + int *has_more) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + struct xfs_inobt_rec_incore *irec; + xfs_agino_t restart; + int error; + + ASSERT(iwag->nr_recs > 0); + + /* Delete cursor but remember the last record we cached... */ + xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); + irec = &iwag->recs[iwag->nr_recs - 1]; + restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; + + error = xfs_iwalk_ag_recs(iwag); + if (error) + return error; + + /* ...empty the cache... */ + iwag->nr_recs = 0; + + if (!has_more) + return 0; + + /* ...and recreate the cursor just past where we left off. */ + error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp); + if (error) + return error; + + return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); +} + +/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ +STATIC int +xfs_iwalk_ag( + struct xfs_iwalk_ag *iwag) +{ + struct xfs_mount *mp = iwag->mp; + struct xfs_trans *tp = iwag->tp; + struct xfs_buf *agi_bp = NULL; + struct xfs_btree_cur *cur = NULL; + xfs_agnumber_t agno; + xfs_agino_t agino; + int has_more; + int error = 0; + + /* Set up our cursor at the right place in the inode btree. */ + agno = XFS_INO_TO_AGNO(mp, iwag->startino); + agino = XFS_INO_TO_AGINO(mp, iwag->startino); + error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more); + + while (!error && has_more) { + struct xfs_inobt_rec_incore *irec; + + cond_resched(); + if (xfs_pwork_want_abort(&iwag->pwork)) + goto out; + + /* Fetch the inobt record. */ + irec = &iwag->recs[iwag->nr_recs]; + error = xfs_inobt_get_rec(cur, irec, &has_more); + if (error || !has_more) + break; + + /* No allocated inodes in this chunk; skip it. */ + if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { + error = xfs_btree_increment(cur, 0, &has_more); + if (error) + break; + continue; + } + + /* + * Start readahead for this inode chunk in anticipation of + * walking the inodes. + */ + if (iwag->iwalk_fn) + xfs_iwalk_ichunk_ra(mp, agno, irec); + + /* + * If there's space in the buffer for more records, increment + * the btree cursor and grab more. + */ + if (++iwag->nr_recs < iwag->sz_recs) { + error = xfs_btree_increment(cur, 0, &has_more); + if (error || !has_more) + break; + continue; + } + + /* + * Otherwise, we need to save cursor state and run the callback + * function on the cached records. The run_callbacks function + * is supposed to return a cursor pointing to the record where + * we would be if we had been able to increment like above. + */ + ASSERT(has_more); + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, + &has_more); + } + + if (iwag->nr_recs == 0 || error) + goto out; + + /* Walk the unprocessed records in the cache. */ + error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more); + +out: + xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error); + return error; +} + +/* + * We experimentally determined that the reduction in ioctl call overhead + * diminishes when userspace asks for more than 2048 inodes, so we'll cap + * prefetch at this point. + */ +#define IWALK_MAX_INODE_PREFETCH (2048U) + +/* + * Given the number of inodes to prefetch, set the number of inobt records that + * we cache in memory, which controls the number of inodes we try to read + * ahead. Set the maximum if @inodes == 0. + */ +static inline unsigned int +xfs_iwalk_prefetch( + unsigned int inodes) +{ + unsigned int inobt_records; + + /* + * If the caller didn't tell us the number of inodes they wanted, + * assume the maximum prefetch possible for best performance. + * Otherwise, cap prefetch at that maximum so that we don't start an + * absurd amount of prefetch. + */ + if (inodes == 0) + inodes = IWALK_MAX_INODE_PREFETCH; + inodes = min(inodes, IWALK_MAX_INODE_PREFETCH); + + /* Round the inode count up to a full chunk. */ + inodes = round_up(inodes, XFS_INODES_PER_CHUNK); + + /* + * In order to convert the number of inodes to prefetch into an + * estimate of the number of inobt records to cache, we require a + * conversion factor that reflects our expectations of the average + * loading factor of an inode chunk. Based on data gathered, most + * (but not all) filesystems manage to keep the inode chunks totally + * full, so we'll underestimate slightly so that our readahead will + * still deliver the performance we want on aging filesystems: + * + * inobt = inodes / (INODES_PER_CHUNK * (4 / 5)); + * + * The funny math is to avoid integer division. + */ + inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK); + + /* + * Allocate enough space to prefetch at least two inobt records so that + * we can cache both the record where the iwalk started and the next + * record. This simplifies the AG inode walk loop setup code. + */ + return max(inobt_records, 2U); +} + +/* + * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn + * will be called for each allocated inode, being passed the inode's number and + * @data. @max_prefetch controls how many inobt records' worth of inodes we + * try to readahead. + */ +int +xfs_iwalk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t startino, + unsigned int flags, + xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, + void *data) +{ + struct xfs_iwalk_ag iwag = { + .mp = mp, + .tp = tp, + .iwalk_fn = iwalk_fn, + .data = data, + .startino = startino, + .sz_recs = xfs_iwalk_prefetch(inode_records), + .trim_start = 1, + .skip_empty = 1, + .pwork = XFS_PWORK_SINGLE_THREADED, + }; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + int error; + + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); + + error = xfs_iwalk_alloc(&iwag); + if (error) + return error; + + for (; agno < mp->m_sb.sb_agcount; agno++) { + error = xfs_iwalk_ag(&iwag); + if (error) + break; + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); + if (flags & XFS_INOBT_WALK_SAME_AG) + break; + } + + xfs_iwalk_free(&iwag); + return error; +} + +/* Run per-thread iwalk work. */ +static int +xfs_iwalk_ag_work( + struct xfs_mount *mp, + struct xfs_pwork *pwork) +{ + struct xfs_iwalk_ag *iwag; + int error = 0; + + iwag = container_of(pwork, struct xfs_iwalk_ag, pwork); + if (xfs_pwork_want_abort(pwork)) + goto out; + + error = xfs_iwalk_alloc(iwag); + if (error) + goto out; + + error = xfs_iwalk_ag(iwag); + xfs_iwalk_free(iwag); +out: + kmem_free(iwag); + return error; +} + +/* + * Walk all the inodes in the filesystem using multiple threads to process each + * AG. + */ +int +xfs_iwalk_threaded( + struct xfs_mount *mp, + xfs_ino_t startino, + unsigned int flags, + xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, + bool polled, + void *data) +{ + struct xfs_pwork_ctl pctl; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + unsigned int nr_threads; + int error; + + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); + + nr_threads = xfs_pwork_guess_datadev_parallelism(mp); + error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk", + nr_threads); + if (error) + return error; + + for (; agno < mp->m_sb.sb_agcount; agno++) { + struct xfs_iwalk_ag *iwag; + + if (xfs_pwork_ctl_want_abort(&pctl)) + break; + + iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP); + iwag->mp = mp; + iwag->iwalk_fn = iwalk_fn; + iwag->data = data; + iwag->startino = startino; + iwag->sz_recs = xfs_iwalk_prefetch(inode_records); + xfs_pwork_queue(&pctl, &iwag->pwork); + startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); + if (flags & XFS_INOBT_WALK_SAME_AG) + break; + } + + if (polled) + xfs_pwork_poll(&pctl); + return xfs_pwork_destroy(&pctl); +} + +/* + * Allow callers to cache up to a page's worth of inobt records. This reflects + * the existing inumbers prefetching behavior. Since the inobt walk does not + * itself do anything with the inobt records, we can set a fairly high limit + * here. + */ +#define MAX_INOBT_WALK_PREFETCH \ + (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore)) + +/* + * Given the number of records that the user wanted, set the number of inobt + * records that we buffer in memory. Set the maximum if @inobt_records == 0. + */ +static inline unsigned int +xfs_inobt_walk_prefetch( + unsigned int inobt_records) +{ + /* + * If the caller didn't tell us the number of inobt records they + * wanted, assume the maximum prefetch possible for best performance. + */ + if (inobt_records == 0) + inobt_records = MAX_INOBT_WALK_PREFETCH; + + /* + * Allocate enough space to prefetch at least two inobt records so that + * we can cache both the record where the iwalk started and the next + * record. This simplifies the AG inode walk loop setup code. + */ + inobt_records = max(inobt_records, 2U); + + /* + * Cap prefetch at that maximum so that we don't use an absurd amount + * of memory. + */ + return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH); +} + +/* + * Walk all inode btree records in the filesystem starting from @startino. The + * @inobt_walk_fn will be called for each btree record, being passed the incore + * record and @data. @max_prefetch controls how many inobt records we try to + * cache ahead of time. + */ +int +xfs_inobt_walk( + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t startino, + unsigned int flags, + xfs_inobt_walk_fn inobt_walk_fn, + unsigned int inobt_records, + void *data) +{ + struct xfs_iwalk_ag iwag = { + .mp = mp, + .tp = tp, + .inobt_walk_fn = inobt_walk_fn, + .data = data, + .startino = startino, + .sz_recs = xfs_inobt_walk_prefetch(inobt_records), + .pwork = XFS_PWORK_SINGLE_THREADED, + }; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); + int error; + + ASSERT(agno < mp->m_sb.sb_agcount); + ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL)); + + error = xfs_iwalk_alloc(&iwag); + if (error) + return error; + + for (; agno < mp->m_sb.sb_agcount; agno++) { + error = xfs_iwalk_ag(&iwag); + if (error) + break; + iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); + if (flags & XFS_INOBT_WALK_SAME_AG) + break; + } + + xfs_iwalk_free(&iwag); + return error; +} diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h new file mode 100644 index 000000000000..6c960e10ed4d --- /dev/null +++ b/fs/xfs/xfs_iwalk.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_IWALK_H__ +#define __XFS_IWALK_H__ + +/* Walk all inodes in the filesystem starting from @startino. */ +typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_ino_t ino, void *data); +/* Return values for xfs_iwalk_fn. */ +#define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE) +#define XFS_IWALK_ABORT (XFS_ITER_ABORT) + +int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino, + unsigned int flags, xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, void *data); +int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino, + unsigned int flags, xfs_iwalk_fn iwalk_fn, + unsigned int inode_records, bool poll, void *data); + +/* Only iterate inodes within the same AG as @startino. */ +#define XFS_IWALK_SAME_AG (0x1) + +#define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG) + +/* Walk all inode btree records in the filesystem starting from @startino. */ +typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_agnumber_t agno, + const struct xfs_inobt_rec_incore *irec, + void *data); +/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */ +#define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT) + +int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_ino_t startino, unsigned int flags, + xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records, + void *data); + +/* Only iterate inobt records within the same AG as @startino. */ +#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG) + +#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG) + +#endif /* __XFS_IWALK_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index edbd5a210df2..ca15105681ca 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -110,8 +110,6 @@ typedef __u32 xfs_nlink_t; #define current_restore_flags_nested(sp, f) \ (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) -#define spinlock_destroy(lock) - #define NBBY 8 /* number of bits per byte */ /* @@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y) return x; } +int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count, + char *data, unsigned int op); + #define ASSERT_ALWAYS(expr) \ (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 2466b0f5b6c4..00e9f5c388d3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -16,11 +16,7 @@ #include "xfs_trans_priv.h" #include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_log_recover.h" -#include "xfs_inode.h" #include "xfs_trace.h" -#include "xfs_fsops.h" -#include "xfs_cksum.h" #include "xfs_sysfs.h" #include "xfs_sb.h" #include "xfs_health.h" @@ -45,21 +41,14 @@ STATIC int xlog_space_left( struct xlog *log, atomic64_t *head); -STATIC int -xlog_sync( - struct xlog *log, - struct xlog_in_core *iclog); STATIC void xlog_dealloc_log( struct xlog *log); /* local state machine functions */ -STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); -STATIC void -xlog_state_do_callback( - struct xlog *log, - int aborted, - struct xlog_in_core *iclog); +STATIC void xlog_state_done_syncing( + struct xlog_in_core *iclog, + bool aborted); STATIC int xlog_state_get_iclog_space( struct xlog *log, @@ -107,8 +96,7 @@ STATIC void xlog_verify_iclog( struct xlog *log, struct xlog_in_core *iclog, - int count, - bool syncing); + int count); STATIC void xlog_verify_tail_lsn( struct xlog *log, @@ -117,7 +105,7 @@ xlog_verify_tail_lsn( #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) -#define xlog_verify_iclog(a,b,c,d) +#define xlog_verify_iclog(a,b,c) #define xlog_verify_tail_lsn(a,b,c) #endif @@ -541,32 +529,6 @@ xfs_log_done( return lsn; } -/* - * Attaches a new iclog I/O completion callback routine during - * transaction commit. If the log is in error state, a non-zero - * return code is handed back and the caller is responsible for - * executing the callback at an appropriate time. - */ -int -xfs_log_notify( - struct xlog_in_core *iclog, - xfs_log_callback_t *cb) -{ - int abortflg; - - spin_lock(&iclog->ic_callback_lock); - abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); - if (!abortflg) { - ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || - (iclog->ic_state == XLOG_STATE_WANT_SYNC)); - cb->cb_next = NULL; - *(iclog->ic_callback_tail) = cb; - iclog->ic_callback_tail = &(cb->cb_next); - } - spin_unlock(&iclog->ic_callback_lock); - return abortflg; -} - int xfs_log_release_iclog( struct xfs_mount *mp, @@ -807,16 +769,12 @@ xfs_log_mount_finish( * The mount has failed. Cancel the recovery if it hasn't completed and destroy * the log. */ -int +void xfs_log_mount_cancel( struct xfs_mount *mp) { - int error; - - error = xlog_recover_cancel(mp->m_log); + xlog_recover_cancel(mp->m_log); xfs_log_unmount(mp); - - return error; } /* @@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) * Or, if we are doing a forced umount (typically because of IO errors). */ if (mp->m_flags & XFS_MOUNT_NORECOVERY || - xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { + xfs_readonly_buftarg(log->l_targ)) { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); return 0; } @@ -1244,53 +1202,49 @@ xlog_space_left( } -/* - * Log function which is called when an io completes. - * - * The log manager needs its own routine, in order to control what - * happens with the buffer after the write completes. - */ static void -xlog_iodone(xfs_buf_t *bp) +xlog_ioend_work( + struct work_struct *work) { - struct xlog_in_core *iclog = bp->b_log_item; - struct xlog *l = iclog->ic_log; - int aborted = 0; + struct xlog_in_core *iclog = + container_of(work, struct xlog_in_core, ic_end_io_work); + struct xlog *log = iclog->ic_log; + bool aborted = false; + int error; + + error = blk_status_to_errno(iclog->ic_bio.bi_status); +#ifdef DEBUG + /* treat writes with injected CRC errors as failed */ + if (iclog->ic_fail_crc) + error = -EIO; +#endif /* - * Race to shutdown the filesystem if we see an error or the iclog is in - * IOABORT state. The IOABORT state is only set in DEBUG mode to inject - * CRC errors into log recovery. + * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || - iclog->ic_state & XLOG_STATE_IOABORT) { - if (iclog->ic_state & XLOG_STATE_IOABORT) - iclog->ic_state &= ~XLOG_STATE_IOABORT; - - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_stale(bp); - xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); + if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) { + xfs_alert(log->l_mp, "log I/O error %d", error); + xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR); /* * This flag will be propagated to the trans-committed * callback routines to let them know that the log-commit * didn't succeed. */ - aborted = XFS_LI_ABORTED; + aborted = true; } else if (iclog->ic_state & XLOG_STATE_IOERROR) { - aborted = XFS_LI_ABORTED; + aborted = true; } - /* log I/O is always issued ASYNC */ - ASSERT(bp->b_flags & XBF_ASYNC); xlog_state_done_syncing(iclog, aborted); + bio_uninit(&iclog->ic_bio); /* - * drop the buffer lock now that we are done. Nothing references - * the buffer after this, so an unmount waiting on this lock can now - * tear it down safely. As such, it is unsafe to reference the buffer - * (bp) after the unlock as we could race with it being freed. + * Drop the lock to signal that we are done. Nothing references the + * iclog after this, so an unmount waiting on this lock can now tear it + * down safely. As such, it is unsafe to reference the iclog after the + * unlock as we could race with it being freed. */ - xfs_buf_unlock(bp); + up(&iclog->ic_sema); } /* @@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp) * If the filesystem blocksize is too large, we may need to choose a * larger size since the directory code currently logs entire blocks. */ - STATIC void xlog_get_iclog_buffer_size( struct xfs_mount *mp, struct xlog *log) { - int size; - int xhdrs; - if (mp->m_logbufs <= 0) - log->l_iclog_bufs = XLOG_MAX_ICLOGS; - else - log->l_iclog_bufs = mp->m_logbufs; + mp->m_logbufs = XLOG_MAX_ICLOGS; + if (mp->m_logbsize <= 0) + mp->m_logbsize = XLOG_BIG_RECORD_BSIZE; + + log->l_iclog_bufs = mp->m_logbufs; + log->l_iclog_size = mp->m_logbsize; /* - * Buffer size passed in from mount system call. + * # headers = size / 32k - one header holds cycles from 32k of data. */ - if (mp->m_logbsize > 0) { - size = log->l_iclog_size = mp->m_logbsize; - log->l_iclog_size_log = 0; - while (size != 1) { - log->l_iclog_size_log++; - size >>= 1; - } - - if (xfs_sb_version_haslogv2(&mp->m_sb)) { - /* # headers = size / 32k - * one header holds cycles from 32k of data - */ - - xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE; - if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE) - xhdrs++; - log->l_iclog_hsize = xhdrs << BBSHIFT; - log->l_iclog_heads = xhdrs; - } else { - ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE); - log->l_iclog_hsize = BBSIZE; - log->l_iclog_heads = 1; - } - goto done; - } - - /* All machines use 32kB buffers by default. */ - log->l_iclog_size = XLOG_BIG_RECORD_BSIZE; - log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT; - - /* the default log size is 16k or 32k which is one header sector */ - log->l_iclog_hsize = BBSIZE; - log->l_iclog_heads = 1; - -done: - /* are we being asked to make the sizes selected above visible? */ - if (mp->m_logbufs == 0) - mp->m_logbufs = log->l_iclog_bufs; - if (mp->m_logbsize == 0) - mp->m_logbsize = log->l_iclog_size; -} /* xlog_get_iclog_buffer_size */ - + log->l_iclog_heads = + DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE); + log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT; +} void xfs_log_work_queue( @@ -1422,7 +1337,6 @@ xlog_alloc_log( xlog_rec_header_t *head; xlog_in_core_t **iclogp; xlog_in_core_t *iclog, *prev_iclog=NULL; - xfs_buf_t *bp; int i; int error = -ENOMEM; uint log2_size = 0; @@ -1480,30 +1394,6 @@ xlog_alloc_log( xlog_get_iclog_buffer_size(mp, log); - /* - * Use a NULL block for the extra log buffer used during splits so that - * it will trigger errors if we ever try to do IO on it without first - * having set it up properly. - */ - error = -ENOMEM; - bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL, - BTOBB(log->l_iclog_size), XBF_NO_IOACCT); - if (!bp) - goto out_free_log; - - /* - * The iclogbuf buffer locks are held over IO but we are not going to do - * IO yet. Hence unlock the buffer so that the log IO path can grab it - * when appropriately. - */ - ASSERT(xfs_buf_islocked(bp)); - xfs_buf_unlock(bp); - - /* use high priority wq for log I/O completion */ - bp->b_ioend_wq = mp->m_log_workqueue; - bp->b_iodone = xlog_iodone; - log->l_xbuf = bp; - spin_lock_init(&log->l_icloglock); init_waitqueue_head(&log->l_flush_wait); @@ -1516,29 +1406,22 @@ xlog_alloc_log( * xlog_in_core_t in xfs_log_priv.h for details. */ ASSERT(log->l_iclog_size >= 4096); - for (i=0; i < log->l_iclog_bufs; i++) { - *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); - if (!*iclogp) + for (i = 0; i < log->l_iclog_bufs; i++) { + size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) * + sizeof(struct bio_vec); + + iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL); + if (!iclog) goto out_free_iclog; - iclog = *iclogp; + *iclogp = iclog; iclog->ic_prev = prev_iclog; prev_iclog = iclog; - bp = xfs_buf_get_uncached(mp->m_logdev_targp, - BTOBB(log->l_iclog_size), - XBF_NO_IOACCT); - if (!bp) + iclog->ic_data = kmem_alloc_large(log->l_iclog_size, + KM_MAYFAIL); + if (!iclog->ic_data) goto out_free_iclog; - - ASSERT(xfs_buf_islocked(bp)); - xfs_buf_unlock(bp); - - /* use high priority wq for log I/O completion */ - bp->b_ioend_wq = mp->m_log_workqueue; - bp->b_iodone = xlog_iodone; - iclog->ic_bp = bp; - iclog->ic_data = bp->b_addr; #ifdef DEBUG log->l_iclog_bak[i] = &iclog->ic_header; #endif @@ -1552,36 +1435,43 @@ xlog_alloc_log( head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); - iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; + iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; atomic_set(&iclog->ic_refcnt, 0); spin_lock_init(&iclog->ic_callback_lock); - iclog->ic_callback_tail = &(iclog->ic_callback); + INIT_LIST_HEAD(&iclog->ic_callbacks); iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); + INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work); + sema_init(&iclog->ic_sema, 1); iclogp = &iclog->ic_next; } *iclogp = log->l_iclog; /* complete ring */ log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ + log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s", + WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0, + mp->m_fsname); + if (!log->l_ioend_workqueue) + goto out_free_iclog; + error = xlog_cil_init(log); if (error) - goto out_free_iclog; + goto out_destroy_workqueue; return log; +out_destroy_workqueue: + destroy_workqueue(log->l_ioend_workqueue); out_free_iclog: for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { prev_iclog = iclog->ic_next; - if (iclog->ic_bp) - xfs_buf_free(iclog->ic_bp); + kmem_free(iclog->ic_data); kmem_free(iclog); } - spinlock_destroy(&log->l_icloglock); - xfs_buf_free(log->l_xbuf); out_free_log: kmem_free(log); out: @@ -1766,42 +1656,155 @@ xlog_cksum( return xfs_end_cksum(crc); } -/* - * The bdstrat callback function for log bufs. This gives us a central - * place to trap bufs in case we get hit by a log I/O error and need to - * shutdown. Actually, in practice, even when we didn't get a log error, - * we transition the iclogs to IOERROR state *after* flushing all existing - * iclogs to disk. This is because we don't want anymore new transactions to be - * started or completed afterwards. - * - * We lock the iclogbufs here so that we can serialise against IO completion - * during unmount. We might be processing a shutdown triggered during unmount, - * and that can occur asynchronously to the unmount thread, and hence we need to - * ensure that completes before tearing down the iclogbufs. Hence we need to - * hold the buffer lock across the log IO to acheive that. - */ -STATIC int -xlog_bdstrat( - struct xfs_buf *bp) +static void +xlog_bio_end_io( + struct bio *bio) { - struct xlog_in_core *iclog = bp->b_log_item; + struct xlog_in_core *iclog = bio->bi_private; - xfs_buf_lock(bp); - if (iclog->ic_state & XLOG_STATE_IOERROR) { - xfs_buf_ioerror(bp, -EIO); - xfs_buf_stale(bp); - xfs_buf_ioend(bp); + queue_work(iclog->ic_log->l_ioend_workqueue, + &iclog->ic_end_io_work); +} + +static void +xlog_map_iclog_data( + struct bio *bio, + void *data, + size_t count) +{ + do { + struct page *page = kmem_to_page(data); + unsigned int off = offset_in_page(data); + size_t len = min_t(size_t, count, PAGE_SIZE - off); + + WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len); + + data += len; + count -= len; + } while (count); +} + +STATIC void +xlog_write_iclog( + struct xlog *log, + struct xlog_in_core *iclog, + uint64_t bno, + unsigned int count, + bool need_flush) +{ + ASSERT(bno < log->l_logBBsize); + + /* + * We lock the iclogbufs here so that we can serialise against I/O + * completion during unmount. We might be processing a shutdown + * triggered during unmount, and that can occur asynchronously to the + * unmount thread, and hence we need to ensure that completes before + * tearing down the iclogbufs. Hence we need to hold the buffer lock + * across the log IO to archieve that. + */ + down(&iclog->ic_sema); + if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) { /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of - * doing it here. Similarly, IO completion will unlock the - * buffer, so we don't do it here. + * doing it here. We kick of the state machine and unlock + * the buffer manually, the code needs to be kept in sync + * with the I/O completion path. */ - return 0; + xlog_state_done_syncing(iclog, XFS_LI_ABORTED); + up(&iclog->ic_sema); + return; } - xfs_buf_submit(bp); - return 0; + iclog->ic_io_size = count; + + bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE)); + bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev); + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno; + iclog->ic_bio.bi_end_io = xlog_bio_end_io; + iclog->ic_bio.bi_private = iclog; + iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA; + if (need_flush) + iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + + xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size); + if (is_vmalloc_addr(iclog->ic_data)) + flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size); + + /* + * If this log buffer would straddle the end of the log we will have + * to split it up into two bios, so that we can continue at the start. + */ + if (bno + BTOBB(count) > log->l_logBBsize) { + struct bio *split; + + split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno, + GFP_NOIO, &fs_bio_set); + bio_chain(split, &iclog->ic_bio); + submit_bio(split); + + /* restart at logical offset zero for the remainder */ + iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart; + } + + submit_bio(&iclog->ic_bio); +} + +/* + * We need to bump cycle number for the part of the iclog that is + * written to the start of the log. Watch out for the header magic + * number case, though. + */ +static void +xlog_split_iclog( + struct xlog *log, + void *data, + uint64_t bno, + unsigned int count) +{ + unsigned int split_offset = BBTOB(log->l_logBBsize - bno); + unsigned int i; + + for (i = split_offset; i < count; i += BBSIZE) { + uint32_t cycle = get_unaligned_be32(data + i); + + if (++cycle == XLOG_HEADER_MAGIC_NUM) + cycle++; + put_unaligned_be32(cycle, data + i); + } +} + +static int +xlog_calc_iclog_size( + struct xlog *log, + struct xlog_in_core *iclog, + uint32_t *roundoff) +{ + uint32_t count_init, count; + bool use_lsunit; + + use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) && + log->l_mp->m_sb.sb_logsunit > 1; + + /* Add for LR header */ + count_init = log->l_iclog_hsize + iclog->ic_offset; + + /* Round out the log write size */ + if (use_lsunit) { + /* we have a v2 stripe unit to use */ + count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); + } else { + count = BBTOB(BTOBB(count_init)); + } + + ASSERT(count >= count_init); + *roundoff = count - count_init; + + if (use_lsunit) + ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit); + else + ASSERT(*roundoff < BBTOB(1)); + return count; } /* @@ -1824,46 +1827,23 @@ xlog_bdstrat( * log will require grabbing the lock though. * * The entire log manager uses a logical block numbering scheme. Only - * log_sync (and then only bwrite()) know about the fact that the log may - * not start with block zero on a given device. The log block start offset - * is added immediately before calling bwrite(). + * xlog_write_iclog knows about the fact that the log may not start with + * block zero on a given device. */ - -STATIC int +STATIC void xlog_sync( struct xlog *log, struct xlog_in_core *iclog) { - xfs_buf_t *bp; - int i; - uint count; /* byte count of bwrite */ - uint count_init; /* initial count before roundup */ - int roundoff; /* roundoff to BB or stripe */ - int split = 0; /* split write into two regions */ - int error; - int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); - int size; + unsigned int count; /* byte count of bwrite */ + unsigned int roundoff; /* roundoff to BB or stripe */ + uint64_t bno; + unsigned int size; + bool need_flush = true, split = false; - XFS_STATS_INC(log->l_mp, xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); - /* Add for LR header */ - count_init = log->l_iclog_hsize + iclog->ic_offset; - - /* Round out the log write size */ - if (v2 && log->l_mp->m_sb.sb_logsunit > 1) { - /* we have a v2 stripe unit to use */ - count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init)); - } else { - count = BBTOB(BTOBB(count_init)); - } - roundoff = count - count_init; - ASSERT(roundoff >= 0); - ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 && - roundoff < log->l_mp->m_sb.sb_logsunit) - || - (log->l_mp->m_sb.sb_logsunit <= 1 && - roundoff < BBTOB(1))); + count = xlog_calc_iclog_size(log, iclog, &roundoff); /* move grant heads by roundoff in sync */ xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); @@ -1874,41 +1854,19 @@ xlog_sync( /* real byte length */ size = iclog->ic_offset; - if (v2) + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) size += roundoff; iclog->ic_header.h_len = cpu_to_be32(size); - bp = iclog->ic_bp; - XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); - + XFS_STATS_INC(log->l_mp, xs_log_writes); XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); - /* Do we need to split this write into 2 parts? */ - if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { - char *dptr; - - split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); - count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); - iclog->ic_bwritecnt = 2; + bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)); - /* - * Bump the cycle numbers at the start of each block in the - * part of the iclog that ends up in the buffer that gets - * written to the start of the log. - * - * Watch out for the header magic number case, though. - */ - dptr = (char *)&iclog->ic_header + count; - for (i = 0; i < split; i += BBSIZE) { - uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); - if (++cycle == XLOG_HEADER_MAGIC_NUM) - cycle++; - *(__be32 *)dptr = cpu_to_be32(cycle); - - dptr += BBSIZE; - } - } else { - iclog->ic_bwritecnt = 1; + /* Do we need to split this write into 2 parts? */ + if (bno + BTOBB(count) > log->l_logBBsize) { + xlog_split_iclog(log, &iclog->ic_header, bno, count); + split = true; } /* calculcate the checksum */ @@ -1921,18 +1879,15 @@ xlog_sync( * write on I/O completion and shutdown the fs. The subsequent mount * detects the bad CRC and attempts to recover. */ +#ifdef DEBUG if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); - iclog->ic_state |= XLOG_STATE_IOABORT; + iclog->ic_fail_crc = true; xfs_warn(log->l_mp, "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", be64_to_cpu(iclog->ic_header.h_lsn)); } - - bp->b_io_length = BTOBB(count); - bp->b_log_item = iclog; - bp->b_flags &= ~XBF_FLUSH; - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); +#endif /* * Flush the data device before flushing the log to make sure all meta @@ -1942,50 +1897,14 @@ xlog_sync( * synchronously here; for an internal log we can simply use the block * layer state machine for preflushes. */ - if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) + if (log->l_targ != log->l_mp->m_ddev_targp || split) { xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); - else - bp->b_flags |= XBF_FLUSH; - - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - - xlog_verify_iclog(log, iclog, count, true); - - /* account for log which doesn't start at block #0 */ - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); - - /* - * Don't call xfs_bwrite here. We do log-syncs even when the filesystem - * is shutting down. - */ - error = xlog_bdstrat(bp); - if (error) { - xfs_buf_ioerror_alert(bp, "xlog_sync"); - return error; + need_flush = false; } - if (split) { - bp = iclog->ic_log->l_xbuf; - XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ - xfs_buf_associate_memory(bp, - (char *)&iclog->ic_header + count, split); - bp->b_log_item = iclog; - bp->b_flags &= ~XBF_FLUSH; - bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA); - - ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); - ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - - /* account for internal log which doesn't start at block #0 */ - XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); - error = xlog_bdstrat(bp); - if (error) { - xfs_buf_ioerror_alert(bp, "xlog_sync (split)"); - return error; - } - } - return 0; -} /* xlog_sync */ + + xlog_verify_iclog(log, iclog, count); + xlog_write_iclog(log, iclog, bno, count, need_flush); +} /* * Deallocate a log structure @@ -2005,31 +1924,21 @@ xlog_dealloc_log( */ iclog = log->l_iclog; for (i = 0; i < log->l_iclog_bufs; i++) { - xfs_buf_lock(iclog->ic_bp); - xfs_buf_unlock(iclog->ic_bp); + down(&iclog->ic_sema); + up(&iclog->ic_sema); iclog = iclog->ic_next; } - /* - * Always need to ensure that the extra buffer does not point to memory - * owned by another log buffer before we free it. Also, cycle the lock - * first to ensure we've completed IO on it. - */ - xfs_buf_lock(log->l_xbuf); - xfs_buf_unlock(log->l_xbuf); - xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); - xfs_buf_free(log->l_xbuf); - iclog = log->l_iclog; for (i = 0; i < log->l_iclog_bufs; i++) { - xfs_buf_free(iclog->ic_bp); next_iclog = iclog->ic_next; + kmem_free(iclog->ic_data); kmem_free(iclog); iclog = next_iclog; } - spinlock_destroy(&log->l_icloglock); log->l_mp->m_log = NULL; + destroy_workqueue(log->l_ioend_workqueue); kmem_free(log); } /* xlog_dealloc_log */ @@ -2610,7 +2519,7 @@ xlog_state_clean_log( if (iclog->ic_state == XLOG_STATE_DIRTY) { iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_offset = 0; - ASSERT(iclog->ic_callback == NULL); + ASSERT(list_empty_careful(&iclog->ic_callbacks)); /* * If the number of ops in this iclog indicate it just * contains the dummy transaction, we can @@ -2680,37 +2589,32 @@ xlog_state_clean_log( STATIC xfs_lsn_t xlog_get_lowest_lsn( - struct xlog *log) + struct xlog *log) { - xlog_in_core_t *lsn_log; - xfs_lsn_t lowest_lsn, lsn; + struct xlog_in_core *iclog = log->l_iclog; + xfs_lsn_t lowest_lsn = 0, lsn; - lsn_log = log->l_iclog; - lowest_lsn = 0; do { - if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { - lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); - if ((lsn && !lowest_lsn) || - (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { + if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)) + continue; + + lsn = be64_to_cpu(iclog->ic_header.h_lsn); + if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0) lowest_lsn = lsn; - } - } - lsn_log = lsn_log->ic_next; - } while (lsn_log != log->l_iclog); + } while ((iclog = iclog->ic_next) != log->l_iclog); + return lowest_lsn; } - STATIC void xlog_state_do_callback( struct xlog *log, - int aborted, + bool aborted, struct xlog_in_core *ciclog) { xlog_in_core_t *iclog; xlog_in_core_t *first_iclog; /* used to know when we've * processed all iclogs once */ - xfs_log_callback_t *cb, *cb_next; int flushcnt = 0; xfs_lsn_t lowest_lsn; int ioerrors; /* counter: iclogs with errors */ @@ -2821,7 +2725,7 @@ xlog_state_do_callback( */ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); - if (iclog->ic_callback) + if (!list_empty_careful(&iclog->ic_callbacks)) atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(iclog->ic_header.h_lsn)); @@ -2838,26 +2742,20 @@ xlog_state_do_callback( * callbacks being added. */ spin_lock(&iclog->ic_callback_lock); - cb = iclog->ic_callback; - while (cb) { - iclog->ic_callback_tail = &(iclog->ic_callback); - iclog->ic_callback = NULL; - spin_unlock(&iclog->ic_callback_lock); + while (!list_empty(&iclog->ic_callbacks)) { + LIST_HEAD(tmp); - /* perform callbacks in the order given */ - for (; cb; cb = cb_next) { - cb_next = cb->cb_next; - cb->cb_func(cb->cb_arg, aborted); - } + list_splice_init(&iclog->ic_callbacks, &tmp); + + spin_unlock(&iclog->ic_callback_lock); + xlog_cil_process_committed(&tmp, aborted); spin_lock(&iclog->ic_callback_lock); - cb = iclog->ic_callback; } loopdidcallbacks++; funcdidcallbacks++; spin_lock(&log->l_icloglock); - ASSERT(iclog->ic_callback == NULL); spin_unlock(&iclog->ic_callback_lock); if (!(iclog->ic_state & XLOG_STATE_IOERROR)) iclog->ic_state = XLOG_STATE_DIRTY; @@ -2943,18 +2841,16 @@ xlog_state_do_callback( */ STATIC void xlog_state_done_syncing( - xlog_in_core_t *iclog, - int aborted) + struct xlog_in_core *iclog, + bool aborted) { - struct xlog *log = iclog->ic_log; + struct xlog *log = iclog->ic_log; spin_lock(&log->l_icloglock); ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || iclog->ic_state == XLOG_STATE_IOERROR); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); - ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); - /* * If we got an error, either on the first buffer, or in the case of @@ -2962,13 +2858,8 @@ xlog_state_done_syncing( * and none should ever be attempted to be written to disk * again. */ - if (iclog->ic_state != XLOG_STATE_IOERROR) { - if (--iclog->ic_bwritecnt == 1) { - spin_unlock(&log->l_icloglock); - return; - } + if (iclog->ic_state != XLOG_STATE_IOERROR) iclog->ic_state = XLOG_STATE_DONE_SYNC; - } /* * Someone could be sleeping prior to writing out the next @@ -3237,7 +3128,7 @@ xlog_state_release_iclog( * flags after this point. */ if (sync) - return xlog_sync(log, iclog); + xlog_sync(log, iclog); return 0; } /* xlog_state_release_iclog */ @@ -3828,8 +3719,7 @@ STATIC void xlog_verify_iclog( struct xlog *log, struct xlog_in_core *iclog, - int count, - bool syncing) + int count) { xlog_op_header_t *ophead; xlog_in_core_t *icptr; @@ -3873,7 +3763,7 @@ xlog_verify_iclog( /* clientid is only 1 byte */ p = &ophead->oh_clientid; field_offset = p - base_ptr; - if (!syncing || (field_offset & 0x1ff)) { + if (field_offset & 0x1ff) { clientid = ophead->oh_clientid; } else { idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); @@ -3896,7 +3786,7 @@ xlog_verify_iclog( /* check length */ p = &ophead->oh_len; field_offset = p - base_ptr; - if (!syncing || (field_offset & 0x1ff)) { + if (field_offset & 0x1ff) { op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((uintptr_t)&ophead->oh_len - @@ -4033,7 +3923,7 @@ xfs_log_force_umount( * avoid races. */ wake_up_all(&log->l_cilp->xc_commit_wait); - xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); + xlog_state_do_callback(log, true, NULL); #ifdef XFSERRORDEBUG { diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 73a64bf32f6f..84e06805160f 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -6,6 +6,8 @@ #ifndef __XFS_LOG_H__ #define __XFS_LOG_H__ +struct xfs_cil_ctx; + struct xfs_log_vec { struct xfs_log_vec *lv_next; /* next lv in build list */ int lv_niovecs; /* number of iovecs in lv */ @@ -72,16 +74,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, } /* - * Structure used to pass callback function and the function's argument - * to the log manager. - */ -typedef struct xfs_log_callback { - struct xfs_log_callback *cb_next; - void (*cb_func)(void *, int); - void *cb_arg; -} xfs_log_callback_t; - -/* * By comparing each component, we don't have to worry about extra * endian issues in treating two 32 bit numbers as one 64 bit number */ @@ -125,12 +117,10 @@ int xfs_log_mount(struct xfs_mount *mp, xfs_daddr_t start_block, int num_bblocks); int xfs_log_mount_finish(struct xfs_mount *mp); -int xfs_log_mount_cancel(struct xfs_mount *); +void xfs_log_mount_cancel(struct xfs_mount *); xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); void xfs_log_space_wake(struct xfs_mount *mp); -int xfs_log_notify(struct xlog_in_core *iclog, - struct xfs_log_callback *callback_entry); int xfs_log_release_iclog(struct xfs_mount *mp, struct xlog_in_core *iclog); int xfs_log_reserve(struct xfs_mount *mp, @@ -148,6 +138,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket); void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, xfs_lsn_t *commit_lsn, bool regrant); +void xlog_cil_process_committed(struct list_head *list, bool aborted); bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); void xfs_log_work_queue(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 5e595948bc5a..fa5602d0fd7f 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -10,10 +10,7 @@ #include "xfs_shared.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_alloc.h" #include "xfs_extent_busy.h" -#include "xfs_discard.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_log.h" @@ -246,7 +243,8 @@ xfs_cil_prepare_item( * shadow buffer, so update the the pointer to it appropriately. */ if (!old_lv) { - lv->lv_item->li_ops->iop_pin(lv->lv_item); + if (lv->lv_item->li_ops->iop_pin) + lv->lv_item->li_ops->iop_pin(lv->lv_item); lv->lv_item->li_lv_shadow = NULL; } else if (old_lv != lv) { ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); @@ -576,10 +574,9 @@ xlog_discard_busy_extents( */ static void xlog_cil_committed( - void *args, - int abort) + struct xfs_cil_ctx *ctx, + bool abort) { - struct xfs_cil_ctx *ctx = args; struct xfs_mount *mp = ctx->cil->xc_log->l_mp; /* @@ -614,6 +611,20 @@ xlog_cil_committed( kmem_free(ctx); } +void +xlog_cil_process_committed( + struct list_head *list, + bool aborted) +{ + struct xfs_cil_ctx *ctx; + + while ((ctx = list_first_entry_or_null(list, + struct xfs_cil_ctx, iclog_entry))) { + list_del(&ctx->iclog_entry); + xlog_cil_committed(ctx, aborted); + } +} + /* * Push the Committed Item List to the log. If @push_seq flag is zero, then it * is a background flush and so we can chose to ignore it. Otherwise, if the @@ -835,12 +846,15 @@ restart: if (commit_lsn == -1) goto out_abort; - /* attach all the transactions w/ busy extents to iclog */ - ctx->log_cb.cb_func = xlog_cil_committed; - ctx->log_cb.cb_arg = ctx; - error = xfs_log_notify(commit_iclog, &ctx->log_cb); - if (error) + spin_lock(&commit_iclog->ic_callback_lock); + if (commit_iclog->ic_state & XLOG_STATE_IOERROR) { + spin_unlock(&commit_iclog->ic_callback_lock); goto out_abort; + } + ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE || + commit_iclog->ic_state == XLOG_STATE_WANT_SYNC); + list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks); + spin_unlock(&commit_iclog->ic_callback_lock); /* * now the checkpoint commit is complete and we've attached the @@ -864,7 +878,7 @@ out_skip: out_abort_free_ticket: xfs_log_ticket_put(tic); out_abort: - xlog_cil_committed(ctx, XFS_LI_ABORTED); + xlog_cil_committed(ctx, true); return -EIO; } @@ -984,6 +998,7 @@ xfs_log_commit_cil( { struct xlog *log = mp->m_log; struct xfs_cil *cil = log->l_cilp; + struct xfs_log_item *lip, *next; xfs_lsn_t xc_commit_lsn; /* @@ -1008,7 +1023,7 @@ xfs_log_commit_cil( /* * Once all the items of the transaction have been copied to the CIL, - * the items can be unlocked and freed. + * the items can be unlocked and possibly freed. * * This needs to be done before we drop the CIL context lock because we * have to update state in the log items and unlock them before they go @@ -1017,8 +1032,12 @@ xfs_log_commit_cil( * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ - xfs_trans_free_items(tp, xc_commit_lsn, false); - + trace_xfs_trans_commit_items(tp, _RET_IP_); + list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { + xfs_trans_del_item(lip); + if (lip->li_ops->iop_committing) + lip->li_ops->iop_committing(lip, xc_commit_lsn); + } xlog_cil_push_background(log); up_read(&cil->xc_ctx_lock); diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index b5f82cb36202..b880c23cb6e4 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -10,7 +10,6 @@ struct xfs_buf; struct xlog; struct xlog_ticket; struct xfs_mount; -struct xfs_log_callback; /* * Flags for log structure @@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i) #define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ #define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ #define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ -#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */ #define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ #define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ @@ -179,11 +177,10 @@ typedef struct xlog_ticket { * the iclog. * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. * - ic_next is the pointer to the next iclog in the ring. - * - ic_bp is a pointer to the buffer used to write this incore log to disk. * - ic_log is a pointer back to the global log structure. - * - ic_callback is a linked list of callback function/argument pairs to be - * called after an iclog finishes writing. - * - ic_size is the full size of the header plus data. + * - ic_size is the full size of the log buffer, minus the cycle headers. + * - ic_io_size is the size of the currently pending log buffer write, which + * might be smaller than ic_size * - ic_offset is the current number of bytes written to in this iclog. * - ic_refcnt is bumped when someone is writing to the log. * - ic_state is the state of the iclog. @@ -193,7 +190,7 @@ typedef struct xlog_ticket { * structure cacheline aligned. The following fields can be contended on * by independent processes: * - * - ic_callback_* + * - ic_callbacks * - ic_refcnt * - fields protected by the global l_icloglock * @@ -206,23 +203,28 @@ typedef struct xlog_in_core { wait_queue_head_t ic_write_wait; struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; - struct xfs_buf *ic_bp; struct xlog *ic_log; - int ic_size; - int ic_offset; - int ic_bwritecnt; + u32 ic_size; + u32 ic_io_size; + u32 ic_offset; unsigned short ic_state; char *ic_datap; /* pointer to iclog data */ /* Callback structures need their own cacheline */ spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; - struct xfs_log_callback *ic_callback; - struct xfs_log_callback **ic_callback_tail; + struct list_head ic_callbacks; /* reference counts need their own cacheline */ atomic_t ic_refcnt ____cacheline_aligned_in_smp; xlog_in_core_2_t *ic_data; #define ic_header ic_data->hic_header +#ifdef DEBUG + bool ic_fail_crc : 1; +#endif + struct semaphore ic_sema; + struct work_struct ic_end_io_work; + struct bio ic_bio; + struct bio_vec ic_bvec[]; } xlog_in_core_t; /* @@ -243,7 +245,7 @@ struct xfs_cil_ctx { int space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */ - struct xfs_log_callback log_cb; /* completion callback hook. */ + struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ struct work_struct discard_endio_work; }; @@ -350,9 +352,8 @@ struct xlog { struct xfs_mount *l_mp; /* mount point */ struct xfs_ail *l_ailp; /* AIL log is working with */ struct xfs_cil *l_cilp; /* CIL log is working with */ - struct xfs_buf *l_xbuf; /* extra buffer for log - * wrapping */ struct xfs_buftarg *l_targ; /* buftarg of log */ + struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */ struct delayed_work l_work; /* background flush work */ uint l_flags; uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ @@ -361,7 +362,6 @@ struct xlog { int l_iclog_heads; /* # of iclog header sectors */ uint l_sectBBsize; /* sector size in BBs (2^n) */ int l_iclog_size; /* size of log in bytes */ - int l_iclog_size_log; /* log power size of log */ int l_iclog_bufs; /* number of iclog buffers */ xfs_daddr_t l_logBBstart; /* start block of log */ int l_logsize; /* size of log in bytes */ @@ -418,7 +418,7 @@ xlog_recover( extern int xlog_recover_finish( struct xlog *log); -extern int +extern void xlog_recover_cancel(struct xlog *); extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9329f5adbfbe..13d1d3e95b88 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -13,8 +13,6 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_log.h" @@ -26,7 +24,6 @@ #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_quota.h" -#include "xfs_cksum.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_bmap_btree.h" @@ -79,7 +76,7 @@ struct xfs_buf_cancel { * are valid, false otherwise. */ static inline bool -xlog_verify_bp( +xlog_verify_bno( struct xlog *log, xfs_daddr_t blk_no, int bbcount) @@ -92,22 +89,19 @@ xlog_verify_bp( } /* - * Allocate a buffer to hold log data. The buffer needs to be able - * to map to a range of nbblks basic blocks at any valid (basic - * block) offset within the log. + * Allocate a buffer to hold log data. The buffer needs to be able to map to + * a range of nbblks basic blocks at any valid offset within the log. */ -STATIC xfs_buf_t * -xlog_get_bp( +static char * +xlog_alloc_buffer( struct xlog *log, int nbblks) { - struct xfs_buf *bp; - /* * Pass log block 0 since we don't have an addr yet, buffer will be * verified on read. */ - if (!xlog_verify_bp(log, 0, nbblks)) { + if (!xlog_verify_bno(log, 0, nbblks)) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); @@ -115,69 +109,48 @@ xlog_get_bp( } /* - * We do log I/O in units of log sectors (a power-of-2 - * multiple of the basic block size), so we round up the - * requested size to accommodate the basic blocks required - * for complete log sectors. + * We do log I/O in units of log sectors (a power-of-2 multiple of the + * basic block size), so we round up the requested size to accommodate + * the basic blocks required for complete log sectors. * - * In addition, the buffer may be used for a non-sector- - * aligned block offset, in which case an I/O of the - * requested size could extend beyond the end of the - * buffer. If the requested size is only 1 basic block it - * will never straddle a sector boundary, so this won't be - * an issue. Nor will this be a problem if the log I/O is - * done in basic blocks (sector size 1). But otherwise we - * extend the buffer by one extra log sector to ensure - * there's space to accommodate this possibility. + * In addition, the buffer may be used for a non-sector-aligned block + * offset, in which case an I/O of the requested size could extend + * beyond the end of the buffer. If the requested size is only 1 basic + * block it will never straddle a sector boundary, so this won't be an + * issue. Nor will this be a problem if the log I/O is done in basic + * blocks (sector size 1). But otherwise we extend the buffer by one + * extra log sector to ensure there's space to accommodate this + * possibility. */ if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); - - bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0); - if (bp) - xfs_buf_unlock(bp); - return bp; -} - -STATIC void -xlog_put_bp( - xfs_buf_t *bp) -{ - xfs_buf_free(bp); + return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL); } /* * Return the address of the start of the given block number's data * in a log buffer. The buffer covers a log sector-aligned region. */ -STATIC char * +static inline unsigned int xlog_align( struct xlog *log, - xfs_daddr_t blk_no, - int nbblks, - struct xfs_buf *bp) + xfs_daddr_t blk_no) { - xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); - - ASSERT(offset + nbblks <= bp->b_length); - return bp->b_addr + BBTOB(offset); + return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); } - -/* - * nbblks should be uint, but oh well. Just want to catch that 32-bit length. - */ -STATIC int -xlog_bread_noalign( - struct xlog *log, - xfs_daddr_t blk_no, - int nbblks, - struct xfs_buf *bp) +static int +xlog_do_io( + struct xlog *log, + xfs_daddr_t blk_no, + unsigned int nbblks, + char *data, + unsigned int op) { - int error; + int error; - if (!xlog_verify_bp(log, blk_no, nbblks)) { + if (!xlog_verify_bno(log, blk_no, nbblks)) { xfs_warn(log->l_mp, "Invalid log block/length (0x%llx, 0x%x) for buffer", blk_no, nbblks); @@ -187,107 +160,53 @@ xlog_bread_noalign( blk_no = round_down(blk_no, log->l_sectBBsize); nbblks = round_up(nbblks, log->l_sectBBsize); - ASSERT(nbblks > 0); - ASSERT(nbblks <= bp->b_length); - - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); - bp->b_flags |= XBF_READ; - bp->b_io_length = nbblks; - bp->b_error = 0; - error = xfs_buf_submit(bp); - if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) - xfs_buf_ioerror_alert(bp, __func__); + error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, + BBTOB(nbblks), data, op); + if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) { + xfs_alert(log->l_mp, + "log recovery %s I/O error at daddr 0x%llx len %d error %d", + op == REQ_OP_WRITE ? "write" : "read", + blk_no, nbblks, error); + } return error; } STATIC int -xlog_bread( +xlog_bread_noalign( struct xlog *log, xfs_daddr_t blk_no, int nbblks, - struct xfs_buf *bp, - char **offset) + char *data) { - int error; - - error = xlog_bread_noalign(log, blk_no, nbblks, bp); - if (error) - return error; - - *offset = xlog_align(log, blk_no, nbblks, bp); - return 0; + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); } -/* - * Read at an offset into the buffer. Returns with the buffer in it's original - * state regardless of the result of the read. - */ STATIC int -xlog_bread_offset( +xlog_bread( struct xlog *log, - xfs_daddr_t blk_no, /* block to read from */ - int nbblks, /* blocks to read */ - struct xfs_buf *bp, - char *offset) + xfs_daddr_t blk_no, + int nbblks, + char *data, + char **offset) { - char *orig_offset = bp->b_addr; - int orig_len = BBTOB(bp->b_length); - int error, error2; - - error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks)); - if (error) - return error; - - error = xlog_bread_noalign(log, blk_no, nbblks, bp); + int error; - /* must reset buffer pointer even on error */ - error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); - if (error) - return error; - return error2; + error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); + if (!error) + *offset = data + xlog_align(log, blk_no); + return error; } -/* - * Write out the buffer at the given block for the given number of blocks. - * The buffer is kept locked across the write and is returned locked. - * This can only be used for synchronous log writes. - */ STATIC int xlog_bwrite( struct xlog *log, xfs_daddr_t blk_no, int nbblks, - struct xfs_buf *bp) + char *data) { - int error; - - if (!xlog_verify_bp(log, blk_no, nbblks)) { - xfs_warn(log->l_mp, - "Invalid log block/length (0x%llx, 0x%x) for buffer", - blk_no, nbblks); - XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); - return -EFSCORRUPTED; - } - - blk_no = round_down(blk_no, log->l_sectBBsize); - nbblks = round_up(nbblks, log->l_sectBBsize); - - ASSERT(nbblks > 0); - ASSERT(nbblks <= bp->b_length); - - XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no); - xfs_buf_hold(bp); - xfs_buf_lock(bp); - bp->b_io_length = nbblks; - bp->b_error = 0; - - error = xfs_bwrite(bp); - if (error) - xfs_buf_ioerror_alert(bp, __func__); - xfs_buf_relse(bp); - return error; + return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); } #ifdef DEBUG @@ -377,10 +296,9 @@ xlog_recover_iodone( * We're not going to bother about retrying * this during recovery. One strike! */ - if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buf_ioerror_alert(bp, __func__); - xfs_force_shutdown(bp->b_target->bt_mount, - SHUTDOWN_META_IO_ERROR); + xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); } } @@ -405,7 +323,7 @@ xlog_recover_iodone( STATIC int xlog_find_cycle_start( struct xlog *log, - struct xfs_buf *bp, + char *buffer, xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle) @@ -419,7 +337,7 @@ xlog_find_cycle_start( end_blk = *last_blk; mid_blk = BLK_AVG(first_blk, end_blk); while (mid_blk != first_blk && mid_blk != end_blk) { - error = xlog_bread(log, mid_blk, 1, bp, &offset); + error = xlog_bread(log, mid_blk, 1, buffer, &offset); if (error) return error; mid_cycle = xlog_get_cycle(offset); @@ -455,7 +373,7 @@ xlog_find_verify_cycle( { xfs_daddr_t i, j; uint cycle; - xfs_buf_t *bp; + char *buffer; xfs_daddr_t bufblks; char *buf = NULL; int error = 0; @@ -469,7 +387,7 @@ xlog_find_verify_cycle( bufblks = 1 << ffs(nbblks); while (bufblks > log->l_logBBsize) bufblks >>= 1; - while (!(bp = xlog_get_bp(log, bufblks))) { + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { bufblks >>= 1; if (bufblks < log->l_sectBBsize) return -ENOMEM; @@ -480,7 +398,7 @@ xlog_find_verify_cycle( bcount = min(bufblks, (start_blk + nbblks - i)); - error = xlog_bread(log, i, bcount, bp, &buf); + error = xlog_bread(log, i, bcount, buffer, &buf); if (error) goto out; @@ -498,7 +416,7 @@ xlog_find_verify_cycle( *new_blk = -1; out: - xlog_put_bp(bp); + kmem_free(buffer); return error; } @@ -522,7 +440,7 @@ xlog_find_verify_log_record( int extra_bblks) { xfs_daddr_t i; - xfs_buf_t *bp; + char *buffer; char *offset = NULL; xlog_rec_header_t *head = NULL; int error = 0; @@ -532,12 +450,14 @@ xlog_find_verify_log_record( ASSERT(start_blk != 0 || *last_blk != start_blk); - if (!(bp = xlog_get_bp(log, num_blks))) { - if (!(bp = xlog_get_bp(log, 1))) + buffer = xlog_alloc_buffer(log, num_blks); + if (!buffer) { + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; smallmem = 1; } else { - error = xlog_bread(log, start_blk, num_blks, bp, &offset); + error = xlog_bread(log, start_blk, num_blks, buffer, &offset); if (error) goto out; offset += ((num_blks - 1) << BBSHIFT); @@ -554,7 +474,7 @@ xlog_find_verify_log_record( } if (smallmem) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out; } @@ -607,7 +527,7 @@ xlog_find_verify_log_record( *last_blk = i; out: - xlog_put_bp(bp); + kmem_free(buffer); return error; } @@ -629,7 +549,7 @@ xlog_find_head( struct xlog *log, xfs_daddr_t *return_head_blk) { - xfs_buf_t *bp; + char *buffer; char *offset; xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; int num_scan_bblks; @@ -659,20 +579,20 @@ xlog_find_head( } first_blk = 0; /* get cycle # of 1st block */ - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; - error = xlog_bread(log, 0, 1, bp, &offset); + error = xlog_bread(log, 0, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; first_half_cycle = xlog_get_cycle(offset); last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ - error = xlog_bread(log, last_blk, 1, bp, &offset); + error = xlog_bread(log, last_blk, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; last_half_cycle = xlog_get_cycle(offset); ASSERT(last_half_cycle != 0); @@ -740,9 +660,10 @@ xlog_find_head( * ^ we want to locate this spot */ stop_on_cycle = last_half_cycle; - if ((error = xlog_find_cycle_start(log, bp, first_blk, - &head_blk, last_half_cycle))) - goto bp_err; + error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, + last_half_cycle); + if (error) + goto out_free_buffer; } /* @@ -762,7 +683,7 @@ xlog_find_head( if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks, stop_on_cycle, &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) head_blk = new_blk; } else { /* need to read 2 parts of log */ @@ -799,7 +720,7 @@ xlog_find_head( if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks - (int)head_blk, (stop_on_cycle - 1), &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) { head_blk = new_blk; goto validate_head; @@ -815,7 +736,7 @@ xlog_find_head( if ((error = xlog_find_verify_cycle(log, start_blk, (int)head_blk, stop_on_cycle, &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) head_blk = new_blk; } @@ -834,13 +755,13 @@ validate_head: if (error == 1) error = -EIO; if (error) - goto bp_err; + goto out_free_buffer; } else { start_blk = 0; ASSERT(head_blk <= INT_MAX); error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); if (error < 0) - goto bp_err; + goto out_free_buffer; if (error == 1) { /* We hit the beginning of the log during our search */ start_blk = log_bbnum - (num_scan_bblks - head_blk); @@ -853,14 +774,14 @@ validate_head: if (error == 1) error = -EIO; if (error) - goto bp_err; + goto out_free_buffer; if (new_blk != log_bbnum) head_blk = new_blk; } else if (error) - goto bp_err; + goto out_free_buffer; } - xlog_put_bp(bp); + kmem_free(buffer); if (head_blk == log_bbnum) *return_head_blk = 0; else @@ -873,9 +794,8 @@ validate_head: */ return 0; - bp_err: - xlog_put_bp(bp); - +out_free_buffer: + kmem_free(buffer); if (error) xfs_warn(log->l_mp, "failed to find log head"); return error; @@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr( xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int count, - struct xfs_buf *bp, + char *buffer, xfs_daddr_t *rblk, struct xlog_rec_header **rhead, bool *wrapped) @@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr( */ end_blk = head_blk > tail_blk ? tail_blk : 0; for (i = (int) head_blk - 1; i >= end_blk; i--) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr( */ if (tail_blk >= head_blk && found != count) { for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -969,7 +889,7 @@ xlog_seek_logrec_hdr( xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int count, - struct xfs_buf *bp, + char *buffer, xfs_daddr_t *rblk, struct xlog_rec_header **rhead, bool *wrapped) @@ -988,7 +908,7 @@ xlog_seek_logrec_hdr( */ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; for (i = (int) tail_blk; i <= end_blk; i++) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr( */ if (tail_blk > head_blk && found != count) { for (i = 0; i < (int) head_blk; i++) { - error = xlog_bread(log, i, 1, bp, &offset); + error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; @@ -1069,22 +989,22 @@ xlog_verify_tail( int hsize) { struct xlog_rec_header *thead; - struct xfs_buf *bp; + char *buffer; xfs_daddr_t first_bad; int error = 0; bool wrapped; xfs_daddr_t tmp_tail; xfs_daddr_t orig_tail = *tail_blk; - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; /* * Make sure the tail points to a record (returns positive count on * success). */ - error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, + error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, &tmp_tail, &thead, &wrapped); if (error < 0) goto out; @@ -1113,8 +1033,8 @@ xlog_verify_tail( break; /* skip to the next record; returns positive count on success */ - error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, - &tmp_tail, &thead, &wrapped); + error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, + buffer, &tmp_tail, &thead, &wrapped); if (error < 0) goto out; @@ -1129,7 +1049,7 @@ xlog_verify_tail( "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", orig_tail, *tail_blk); out: - xlog_put_bp(bp); + kmem_free(buffer); return error; } @@ -1151,13 +1071,13 @@ xlog_verify_head( struct xlog *log, xfs_daddr_t *head_blk, /* in/out: unverified head */ xfs_daddr_t *tail_blk, /* out: tail block */ - struct xfs_buf *bp, + char *buffer, xfs_daddr_t *rhead_blk, /* start blk of last record */ struct xlog_rec_header **rhead, /* ptr to last record */ bool *wrapped) /* last rec. wraps phys. log */ { struct xlog_rec_header *tmp_rhead; - struct xfs_buf *tmp_bp; + char *tmp_buffer; xfs_daddr_t first_bad; xfs_daddr_t tmp_rhead_blk; int found; @@ -1168,15 +1088,15 @@ xlog_verify_head( * Check the head of the log for torn writes. Search backwards from the * head until we hit the tail or the maximum number of log record I/Os * that could have been in flight at one time. Use a temporary buffer so - * we don't trash the rhead/bp pointers from the caller. + * we don't trash the rhead/buffer pointers from the caller. */ - tmp_bp = xlog_get_bp(log, 1); - if (!tmp_bp) + tmp_buffer = xlog_alloc_buffer(log, 1); + if (!tmp_buffer) return -ENOMEM; error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, - XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, - &tmp_rhead, &tmp_wrapped); - xlog_put_bp(tmp_bp); + XLOG_MAX_ICLOGS, tmp_buffer, + &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); + kmem_free(tmp_buffer); if (error < 0) return error; @@ -1205,8 +1125,8 @@ xlog_verify_head( * (i.e., the records with invalid CRC) if the cycle number * matches the the current cycle. */ - found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, - rhead_blk, rhead, wrapped); + found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, + buffer, rhead_blk, rhead, wrapped); if (found < 0) return found; if (found == 0) /* XXX: right thing to do here? */ @@ -1266,7 +1186,7 @@ xlog_check_unmount_rec( xfs_daddr_t *tail_blk, struct xlog_rec_header *rhead, xfs_daddr_t rhead_blk, - struct xfs_buf *bp, + char *buffer, bool *clean) { struct xlog_op_header *op_head; @@ -1309,7 +1229,7 @@ xlog_check_unmount_rec( if (*head_blk == after_umount_blk && be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); if (error) return error; @@ -1388,7 +1308,7 @@ xlog_find_tail( { xlog_rec_header_t *rhead; char *offset = NULL; - xfs_buf_t *bp; + char *buffer; int error; xfs_daddr_t rhead_blk; xfs_lsn_t tail_lsn; @@ -1402,11 +1322,11 @@ xlog_find_tail( return error; ASSERT(*head_blk < INT_MAX); - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; if (*head_blk == 0) { /* special case */ - error = xlog_bread(log, 0, 1, bp, &offset); + error = xlog_bread(log, 0, 1, buffer, &offset); if (error) goto done; @@ -1422,7 +1342,7 @@ xlog_find_tail( * block. This wraps all the way back around to the head so something is * seriously wrong if we can't find it. */ - error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, &rhead_blk, &rhead, &wrapped); if (error < 0) return error; @@ -1443,7 +1363,7 @@ xlog_find_tail( * state to determine whether recovery is necessary. */ error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, - rhead_blk, bp, &clean); + rhead_blk, buffer, &clean); if (error) goto done; @@ -1460,7 +1380,7 @@ xlog_find_tail( if (!clean) { xfs_daddr_t orig_head = *head_blk; - error = xlog_verify_head(log, head_blk, tail_blk, bp, + error = xlog_verify_head(log, head_blk, tail_blk, buffer, &rhead_blk, &rhead, &wrapped); if (error) goto done; @@ -1471,7 +1391,7 @@ xlog_find_tail( wrapped); tail_lsn = atomic64_read(&log->l_tail_lsn); error = xlog_check_unmount_rec(log, head_blk, tail_blk, - rhead, rhead_blk, bp, + rhead, rhead_blk, buffer, &clean); if (error) goto done; @@ -1505,11 +1425,11 @@ xlog_find_tail( * But... if the -device- itself is readonly, just skip this. * We can't recover this device anyway, so it won't matter. */ - if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) + if (!xfs_readonly_buftarg(log->l_targ)) error = xlog_clear_stale_blocks(log, tail_lsn); done: - xlog_put_bp(bp); + kmem_free(buffer); if (error) xfs_warn(log->l_mp, "failed to locate log tail"); @@ -1537,7 +1457,7 @@ xlog_find_zeroed( struct xlog *log, xfs_daddr_t *blk_no) { - xfs_buf_t *bp; + char *buffer; char *offset; uint first_cycle, last_cycle; xfs_daddr_t new_blk, last_blk, start_blk; @@ -1547,35 +1467,36 @@ xlog_find_zeroed( *blk_no = 0; /* check totally zeroed log */ - bp = xlog_get_bp(log, 1); - if (!bp) + buffer = xlog_alloc_buffer(log, 1); + if (!buffer) return -ENOMEM; - error = xlog_bread(log, 0, 1, bp, &offset); + error = xlog_bread(log, 0, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; - xlog_put_bp(bp); + kmem_free(buffer); return 1; } /* check partially zeroed log */ - error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); + error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); if (error) - goto bp_err; + goto out_free_buffer; last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ - xlog_put_bp(bp); + kmem_free(buffer); return 0; } /* we have a partially zeroed log */ last_blk = log_bbnum-1; - if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) - goto bp_err; + error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); + if (error) + goto out_free_buffer; /* * Validate the answer. Because there is no way to guarantee that @@ -1598,7 +1519,7 @@ xlog_find_zeroed( */ if ((error = xlog_find_verify_cycle(log, start_blk, (int)num_scan_bblks, 0, &new_blk))) - goto bp_err; + goto out_free_buffer; if (new_blk != -1) last_blk = new_blk; @@ -1610,11 +1531,11 @@ xlog_find_zeroed( if (error == 1) error = -EIO; if (error) - goto bp_err; + goto out_free_buffer; *blk_no = last_blk; -bp_err: - xlog_put_bp(bp); +out_free_buffer: + kmem_free(buffer); if (error) return error; return 1; @@ -1657,7 +1578,7 @@ xlog_write_log_records( int tail_block) { char *offset; - xfs_buf_t *bp; + char *buffer; int balign, ealign; int sectbb = log->l_sectBBsize; int end_block = start_block + blocks; @@ -1674,7 +1595,7 @@ xlog_write_log_records( bufblks = 1 << ffs(blocks); while (bufblks > log->l_logBBsize) bufblks >>= 1; - while (!(bp = xlog_get_bp(log, bufblks))) { + while (!(buffer = xlog_alloc_buffer(log, bufblks))) { bufblks >>= 1; if (bufblks < sectbb) return -ENOMEM; @@ -1686,9 +1607,9 @@ xlog_write_log_records( */ balign = round_down(start_block, sectbb); if (balign != start_block) { - error = xlog_bread_noalign(log, start_block, 1, bp); + error = xlog_bread_noalign(log, start_block, 1, buffer); if (error) - goto out_put_bp; + goto out_free_buffer; j = start_block - balign; } @@ -1705,29 +1626,28 @@ xlog_write_log_records( */ ealign = round_down(end_block, sectbb); if (j == 0 && (start_block + endcount > ealign)) { - offset = bp->b_addr + BBTOB(ealign - start_block); - error = xlog_bread_offset(log, ealign, sectbb, - bp, offset); + error = xlog_bread_noalign(log, ealign, sectbb, + buffer + BBTOB(ealign - start_block)); if (error) break; } - offset = xlog_align(log, start_block, endcount, bp); + offset = buffer + xlog_align(log, start_block); for (; j < endcount; j++) { xlog_add_record(log, offset, cycle, i+j, tail_cycle, tail_block); offset += BBSIZE; } - error = xlog_bwrite(log, start_block, endcount, bp); + error = xlog_bwrite(log, start_block, endcount, buffer); if (error) break; start_block += endcount; j = 0; } - out_put_bp: - xlog_put_bp(bp); +out_free_buffer: + kmem_free(buffer); return error; } @@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer( if (xfs_sb_version_hascrc(&mp->m_sb)) bp->b_ops = &xfs_inode_buf_ops; - inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; + inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog; for (i = 0; i < inodes_per_buf; i++) { next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + offsetof(xfs_dinode_t, di_next_unlinked); @@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer( ASSERT(item->ri_buf[item_index].i_addr != NULL); ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); - ASSERT((reg_buf_offset + reg_buf_bytes) <= - BBTOB(bp->b_io_length)); + ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length)); /* * The current logged region contains a copy of the @@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer( ASSERT(nbits > 0); ASSERT(item->ri_buf[i].i_addr != NULL); ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); - ASSERT(BBTOB(bp->b_io_length) >= + ASSERT(BBTOB(bp->b_length) >= ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); /* @@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2( * * Also make sure that only inode buffers with good sizes stay in * the buffer cache. The kernel moves inodes in buffers of 1 block - * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode + * or inode_cluster_size bytes, whichever is bigger. The inode * buffers in the log can be a different size if the log was generated * by an older kernel using unclustered inode buffers or a newer kernel * running with a different inode cluster size. Regardless, if the - * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) - * for *our* value of mp->m_inode_cluster_size, then we need to keep + * the inode buffer size isn't max(blocksize, inode_cluster_size) + * for *our* value of inode_cluster_size, then we need to keep * the buffer out of the buffer cache so that the buffer won't * overlap with future reads of those inodes. */ if (XFS_DINODE_MAGIC == be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && - (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, - (uint32_t)log->l_mp->m_inode_cluster_size))) { + (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) { xfs_buf_stale(bp); error = xfs_bwrite(bp); } else { - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); bp->b_iodone = xlog_recover_iodone; xfs_buf_delwri_queue(bp, buffer_list); } @@ -3260,7 +3178,7 @@ out_owner_change: /* re-generate the checksum. */ xfs_dinode_calc_crc(log->l_mp, dip); - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); bp->b_iodone = xlog_recover_iodone; xfs_buf_delwri_queue(bp, buffer_list); @@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2( } ASSERT(dq_f->qlf_size == 2); - ASSERT(bp->b_target->bt_mount == mp); + ASSERT(bp->b_mount == mp); bp->b_iodone = xlog_recover_iodone; xfs_buf_delwri_queue(bp, buffer_list); @@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2( { xfs_efd_log_format_t *efd_formatp; xfs_efi_log_item_t *efip = NULL; - xfs_log_item_t *lip; + struct xfs_log_item *lip; uint64_t efi_id; struct xfs_ail_cursor cur; struct xfs_ail *ailp = log->l_ailp; @@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2( { struct xfs_mount *mp = log->l_mp; struct xfs_icreate_log *icl; + struct xfs_ino_geometry *igeo = M_IGEO(mp); xfs_agnumber_t agno; xfs_agblock_t agbno; unsigned int count; @@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2( /* * The inode chunk is either full or sparse and we only support - * m_ialloc_min_blks sized sparse allocations at this time. + * m_ino_geo.ialloc_min_blks sized sparse allocations at this time. */ - if (length != mp->m_ialloc_blks && - length != mp->m_ialloc_min_blks) { + if (length != igeo->ialloc_blks && + length != igeo->ialloc_min_blks) { xfs_warn(log->l_mp, "%s: unsupported chunk length", __FUNCTION__); return -EINVAL; @@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2( * buffers for cancellation so we don't overwrite anything written after * a cancellation. */ - bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); - nbufs = length / mp->m_blocks_per_cluster; + bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); + nbufs = length / igeo->blocks_per_cluster; for (i = 0, cancel_count = 0; i < nbufs; i++) { xfs_daddr_t daddr; daddr = XFS_AGB_TO_DADDR(mp, agno, - agbno + i * mp->m_blocks_per_cluster); + agbno + i * igeo->blocks_per_cluster); if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) cancel_count++; } @@ -4956,12 +4875,11 @@ out: * A cancel occurs when the mount has failed and we're bailing out. * Release all pending log intent items so they don't pin the AIL. */ -STATIC int +STATIC void xlog_recover_cancel_intents( struct xlog *log) { struct xfs_log_item *lip; - int error = 0; struct xfs_ail_cursor cur; struct xfs_ail *ailp; @@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents( xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->ail_lock); - return error; } /* @@ -5307,7 +5224,7 @@ xlog_do_recovery_pass( xfs_daddr_t blk_no, rblk_no; xfs_daddr_t rhead_blk; char *offset; - xfs_buf_t *hbp, *dbp; + char *hbp, *dbp; int error = 0, h_size, h_len; int error2 = 0; int bblks, split_bblks; @@ -5332,7 +5249,7 @@ xlog_do_recovery_pass( * iclog header and extract the header size from it. Get a * new hbp that is the correct size. */ - hbp = xlog_get_bp(log, 1); + hbp = xlog_alloc_buffer(log, 1); if (!hbp) return -ENOMEM; @@ -5374,23 +5291,23 @@ xlog_do_recovery_pass( hblks = h_size / XLOG_HEADER_CYCLE_SIZE; if (h_size % XLOG_HEADER_CYCLE_SIZE) hblks++; - xlog_put_bp(hbp); - hbp = xlog_get_bp(log, hblks); + kmem_free(hbp); + hbp = xlog_alloc_buffer(log, hblks); } else { hblks = 1; } } else { ASSERT(log->l_sectBBsize == 1); hblks = 1; - hbp = xlog_get_bp(log, 1); + hbp = xlog_alloc_buffer(log, 1); h_size = XLOG_BIG_RECORD_BSIZE; } if (!hbp) return -ENOMEM; - dbp = xlog_get_bp(log, BTOBB(h_size)); + dbp = xlog_alloc_buffer(log, BTOBB(h_size)); if (!dbp) { - xlog_put_bp(hbp); + kmem_free(hbp); return -ENOMEM; } @@ -5405,7 +5322,7 @@ xlog_do_recovery_pass( /* * Check for header wrapping around physical end-of-log */ - offset = hbp->b_addr; + offset = hbp; split_hblks = 0; wrapped_hblks = 0; if (blk_no + hblks <= log->l_logBBsize) { @@ -5441,8 +5358,8 @@ xlog_do_recovery_pass( * - order is important. */ wrapped_hblks = hblks - split_hblks; - error = xlog_bread_offset(log, 0, - wrapped_hblks, hbp, + error = xlog_bread_noalign(log, 0, + wrapped_hblks, offset + BBTOB(split_hblks)); if (error) goto bread_err2; @@ -5473,7 +5390,7 @@ xlog_do_recovery_pass( } else { /* This log record is split across the * physical end of log */ - offset = dbp->b_addr; + offset = dbp; split_bblks = 0; if (blk_no != log->l_logBBsize) { /* some data is before the physical @@ -5502,8 +5419,8 @@ xlog_do_recovery_pass( * _first_, then the log start (LR header end) * - order is important. */ - error = xlog_bread_offset(log, 0, - bblks - split_bblks, dbp, + error = xlog_bread_noalign(log, 0, + bblks - split_bblks, offset + BBTOB(split_bblks)); if (error) goto bread_err2; @@ -5551,9 +5468,9 @@ xlog_do_recovery_pass( } bread_err2: - xlog_put_bp(dbp); + kmem_free(dbp); bread_err1: - xlog_put_bp(hbp); + kmem_free(hbp); /* * Submit buffers that have been added from the last record processed, @@ -5687,7 +5604,7 @@ xlog_do_recover( * Now that we've finished replaying all buffer and inode * updates, re-read in the superblock and reverify it. */ - bp = xfs_getsb(mp, 0); + bp = xfs_getsb(mp); bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); ASSERT(!(bp->b_flags & XBF_WRITE)); bp->b_flags |= XBF_READ; @@ -5860,16 +5777,12 @@ xlog_recover_finish( return 0; } -int +void xlog_recover_cancel( struct xlog *log) { - int error = 0; - if (log->l_flags & XLOG_RECOVERY_NEEDED) - error = xlog_recover_cancel_intents(log); - - return error; + xlog_recover_cancel_intents(log); } #if defined(DEBUG) diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 6b736ea58d35..9804efe525a9 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -6,8 +6,8 @@ #include "xfs.h" #include "xfs_fs.h" #include "xfs_error.h" +#include "xfs_shared.h" #include "xfs_format.h" -#include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6b2bfe81dc51..322da6909290 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -12,9 +12,6 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_dir2.h" #include "xfs_ialloc.h" @@ -27,7 +24,6 @@ #include "xfs_error.h" #include "xfs_quota.h" #include "xfs_fsops.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_sysfs.h" #include "xfs_rmap_btree.h" @@ -430,30 +426,6 @@ xfs_update_alignment(xfs_mount_t *mp) } /* - * Set the maximum inode count for this filesystem - */ -STATIC void -xfs_set_maxicount(xfs_mount_t *mp) -{ - xfs_sb_t *sbp = &(mp->m_sb); - uint64_t icount; - - if (sbp->sb_imax_pct) { - /* - * Make sure the maximum inode count is a multiple - * of the units we allocate inodes in. - */ - icount = sbp->sb_dblocks * sbp->sb_imax_pct; - do_div(icount, 100); - do_div(icount, mp->m_ialloc_blks); - mp->m_maxicount = (icount * mp->m_ialloc_blks) << - sbp->sb_inopblog; - } else { - mp->m_maxicount = 0; - } -} - -/* * Set the default minimum read and write sizes unless * already specified in a mount option. * We use smaller I/O sizes when the file system @@ -509,29 +481,6 @@ xfs_set_low_space_thresholds( } } - -/* - * Set whether we're using inode alignment. - */ -STATIC void -xfs_set_inoalignment(xfs_mount_t *mp) -{ - if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) - mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; - else - mp->m_inoalign_mask = 0; - /* - * If we are using stripe alignment, check whether - * the stripe unit is a multiple of the inode alignment - */ - if (mp->m_dalign && mp->m_inoalign_mask && - !(mp->m_dalign & mp->m_inoalign_mask)) - mp->m_sinoalign = mp->m_dalign; - else - mp->m_sinoalign = 0; -} - /* * Check that the data (and log if separate) is an ok size. */ @@ -683,6 +632,7 @@ xfs_mountfs( { struct xfs_sb *sbp = &(mp->m_sb); struct xfs_inode *rip; + struct xfs_ino_geometry *igeo = M_IGEO(mp); uint64_t resblks; uint quotamount = 0; uint quotaflags = 0; @@ -749,12 +699,10 @@ xfs_mountfs( xfs_alloc_compute_maxlevels(mp); xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); - xfs_ialloc_compute_maxlevels(mp); + xfs_ialloc_setup_geometry(mp); xfs_rmapbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp); - xfs_set_maxicount(mp); - /* enable fail_at_unmount as default */ mp->m_fail_unmount = true; @@ -788,50 +736,22 @@ xfs_mountfs( xfs_set_low_space_thresholds(mp); /* - * Set the inode cluster size. - * This may still be overridden by the file system - * block size if it is larger than the chosen cluster size. - * - * For v5 filesystems, scale the cluster size with the inode size to - * keep a constant ratio of inode per cluster buffer, but only if mkfs - * has set the inode alignment value appropriately for larger cluster - * sizes. - */ - mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; - if (xfs_sb_version_hascrc(&mp->m_sb)) { - int new_size = mp->m_inode_cluster_size; - - new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; - if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) - mp->m_inode_cluster_size = new_size; - } - mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp); - mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster); - mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp); - mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align); - - /* * If enabled, sparse inode chunk alignment is expected to match the * cluster size. Full inode chunk alignment must match the chunk size, * but that is checked on sb read verification... */ if (xfs_sb_version_hassparseinodes(&mp->m_sb) && mp->m_sb.sb_spino_align != - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) { xfs_warn(mp, "Sparse inode block alignment (%u) must match cluster size (%llu).", mp->m_sb.sb_spino_align, - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); + XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)); error = -EINVAL; goto out_remove_uuid; } /* - * Set inode alignment fields - */ - xfs_set_inoalignment(mp); - - /* * Check that the data (and log if separate) is an ok size. */ error = xfs_check_sizes(mp); @@ -1385,24 +1305,14 @@ xfs_mod_frextents( * xfs_getsb() is called to obtain the buffer for the superblock. * The buffer is returned locked and read in from disk. * The buffer should be released with a call to xfs_brelse(). - * - * If the flags parameter is BUF_TRYLOCK, then we'll only return - * the superblock buffer if it can be locked without sleeping. - * If it can't then we'll return NULL. */ struct xfs_buf * xfs_getsb( - struct xfs_mount *mp, - int flags) + struct xfs_mount *mp) { struct xfs_buf *bp = mp->m_sb_bp; - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) - return NULL; - xfs_buf_lock(bp); - } - + xfs_buf_lock(bp); xfs_buf_hold(bp); ASSERT(bp->b_flags & XBF_DONE); return bp; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c81a5cd7c228..4adb6837439a 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -105,6 +105,7 @@ typedef struct xfs_mount { struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ struct xlog *m_log; /* log specific stuff */ + struct xfs_ino_geometry m_ino_geo; /* inode geometry */ int m_logbufs; /* number of log buffers */ int m_logbsize; /* size of each log buffer */ uint m_rsumlevels; /* rt summary levels */ @@ -126,12 +127,6 @@ typedef struct xfs_mount { uint8_t m_blkbit_log; /* blocklog + NBBY */ uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ uint8_t m_agno_log; /* log #ag's */ - uint8_t m_agino_log; /* #bits for agino in inum */ - uint m_inode_cluster_size;/* min inode buf size */ - unsigned int m_inodes_per_cluster; - unsigned int m_blocks_per_cluster; - unsigned int m_cluster_align; - unsigned int m_cluster_align_inodes; uint m_blockmask; /* sb_blocksize-1 */ uint m_blockwsize; /* sb_blocksize in words */ uint m_blockwmask; /* blockwsize-1 */ @@ -139,15 +134,12 @@ typedef struct xfs_mount { uint m_alloc_mnr[2]; /* min alloc btree records */ uint m_bmap_dmxr[2]; /* max bmap btree records */ uint m_bmap_dmnr[2]; /* min bmap btree records */ - uint m_inobt_mxr[2]; /* max inobt btree records */ - uint m_inobt_mnr[2]; /* min inobt btree records */ uint m_rmap_mxr[2]; /* max rmap btree records */ uint m_rmap_mnr[2]; /* min rmap btree records */ uint m_refc_mxr[2]; /* max refc btree records */ uint m_refc_mnr[2]; /* min refc btree records */ uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ - uint m_in_maxlevels; /* max inobt btree levels. */ uint m_rmap_maxlevels; /* max rmap btree levels */ uint m_refc_maxlevels; /* max refcount btree level */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ @@ -159,20 +151,13 @@ typedef struct xfs_mount { int m_fixedfsid[2]; /* unchanged for life of FS */ uint64_t m_flags; /* global mount flags */ bool m_finobt_nores; /* no per-AG finobt resv. */ - int m_ialloc_inos; /* inodes in inode allocation */ - int m_ialloc_blks; /* blocks in inode allocation */ - int m_ialloc_min_blks;/* min blocks in sparse inode - * allocation */ - int m_inoalign_mask;/* mask sb_inoalignmt if used */ uint m_qflags; /* quota status flags */ struct xfs_trans_resv m_resv; /* precomputed res values */ - uint64_t m_maxicount; /* maximum inode count */ uint64_t m_resblks; /* total reserved blocks */ uint64_t m_resblks_avail;/* available reserved blocks */ uint64_t m_resblks_save; /* reserved blks @ remount,ro */ int m_dalign; /* stripe unit */ int m_swidth; /* stripe width */ - int m_sinoalign; /* stripe unit inode alignment */ uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ @@ -198,7 +183,6 @@ typedef struct xfs_mount { struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_cil_workqueue; struct workqueue_struct *m_reclaim_workqueue; - struct workqueue_struct *m_log_workqueue; struct workqueue_struct *m_eofblocks_workqueue; struct workqueue_struct *m_sync_workqueue; @@ -226,6 +210,8 @@ typedef struct xfs_mount { #endif } xfs_mount_t; +#define M_IGEO(mp) (&(mp)->m_ino_geo) + /* * Flags for m_flags. */ @@ -465,7 +451,7 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved); extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); -extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); +extern struct xfs_buf *xfs_getsb(xfs_mount_t *); extern int xfs_readsb(xfs_mount_t *, int); extern void xfs_freesb(xfs_mount_t *); extern bool xfs_fs_writable(struct xfs_mount *mp, int level); diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h index c8ba98fae30a..b6701b4f59a9 100644 --- a/fs/xfs/xfs_ondisk.h +++ b/fs/xfs/xfs_ondisk.h @@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void) XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0); XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); + + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); } #endif /* __XFS_ONDISK_H */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index bde2c9f56a46..0c954cad7449 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -2,23 +2,16 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ -#include <linux/iomap.h> #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_log.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" -#include "xfs_error.h" #include "xfs_iomap.h" -#include "xfs_shared.h" -#include "xfs_bit.h" -#include "xfs_pnfs.h" /* * Ensure that we do not have any outstanding pNFS layouts that can be used by diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c new file mode 100644 index 000000000000..4bcc3e61056c --- /dev/null +++ b/fs/xfs/xfs_pwork.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_trace.h" +#include "xfs_sysctl.h" +#include "xfs_pwork.h" +#include <linux/nmi.h> + +/* + * Parallel Work Queue + * =================== + * + * Abstract away the details of running a large and "obviously" parallelizable + * task across multiple CPUs. Callers initialize the pwork control object with + * a desired level of parallelization and a work function. Next, they embed + * struct xfs_pwork in whatever structure they use to pass work context to a + * worker thread and queue that pwork. The work function will be passed the + * pwork item when it is run (from process context) and any returned error will + * be recorded in xfs_pwork_ctl.error. Work functions should check for errors + * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not + * stop workqueue item processing. + * + * This is the rough equivalent of the xfsprogs workqueue code, though we can't + * reuse that name here. + */ + +/* Invoke our caller's function. */ +static void +xfs_pwork_work( + struct work_struct *work) +{ + struct xfs_pwork *pwork; + struct xfs_pwork_ctl *pctl; + int error; + + pwork = container_of(work, struct xfs_pwork, work); + pctl = pwork->pctl; + error = pctl->work_fn(pctl->mp, pwork); + if (error && !pctl->error) + pctl->error = error; + if (atomic_dec_and_test(&pctl->nr_work)) + wake_up(&pctl->poll_wait); +} + +/* + * Set up control data for parallel work. @work_fn is the function that will + * be called. @tag will be written into the kernel threads. @nr_threads is + * the level of parallelism desired, or 0 for no limit. + */ +int +xfs_pwork_init( + struct xfs_mount *mp, + struct xfs_pwork_ctl *pctl, + xfs_pwork_work_fn work_fn, + const char *tag, + unsigned int nr_threads) +{ +#ifdef DEBUG + if (xfs_globals.pwork_threads >= 0) + nr_threads = xfs_globals.pwork_threads; +#endif + trace_xfs_pwork_init(mp, nr_threads, current->pid); + + pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag, + current->pid); + if (!pctl->wq) + return -ENOMEM; + pctl->work_fn = work_fn; + pctl->error = 0; + pctl->mp = mp; + atomic_set(&pctl->nr_work, 0); + init_waitqueue_head(&pctl->poll_wait); + + return 0; +} + +/* Queue some parallel work. */ +void +xfs_pwork_queue( + struct xfs_pwork_ctl *pctl, + struct xfs_pwork *pwork) +{ + INIT_WORK(&pwork->work, xfs_pwork_work); + pwork->pctl = pctl; + atomic_inc(&pctl->nr_work); + queue_work(pctl->wq, &pwork->work); +} + +/* Wait for the work to finish and tear down the control structure. */ +int +xfs_pwork_destroy( + struct xfs_pwork_ctl *pctl) +{ + destroy_workqueue(pctl->wq); + pctl->wq = NULL; + return pctl->error; +} + +/* + * Wait for the work to finish by polling completion status and touch the soft + * lockup watchdog. This is for callers such as mount which hold locks. + */ +void +xfs_pwork_poll( + struct xfs_pwork_ctl *pctl) +{ + while (wait_event_timeout(pctl->poll_wait, + atomic_read(&pctl->nr_work) == 0, HZ) == 0) + touch_softlockup_watchdog(); +} + +/* + * Return the amount of parallelism that the data device can handle, or 0 for + * no limit. + */ +unsigned int +xfs_pwork_guess_datadev_parallelism( + struct xfs_mount *mp) +{ + struct xfs_buftarg *btp = mp->m_ddev_targp; + + /* + * For now we'll go with the most conservative setting possible, + * which is two threads for an SSD and 1 thread everywhere else. + */ + return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1; +} diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h new file mode 100644 index 000000000000..8133124cf3bb --- /dev/null +++ b/fs/xfs/xfs_pwork.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#ifndef __XFS_PWORK_H__ +#define __XFS_PWORK_H__ + +struct xfs_pwork; +struct xfs_mount; + +typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork); + +/* + * Parallel work coordination structure. + */ +struct xfs_pwork_ctl { + struct workqueue_struct *wq; + struct xfs_mount *mp; + xfs_pwork_work_fn work_fn; + struct wait_queue_head poll_wait; + atomic_t nr_work; + int error; +}; + +/* + * Embed this parallel work control item inside your own work structure, + * then queue work with it. + */ +struct xfs_pwork { + struct work_struct work; + struct xfs_pwork_ctl *pctl; +}; + +#define XFS_PWORK_SINGLE_THREADED { .pctl = NULL } + +/* Have we been told to abort? */ +static inline bool +xfs_pwork_ctl_want_abort( + struct xfs_pwork_ctl *pctl) +{ + return pctl && pctl->error; +} + +/* Have we been told to abort? */ +static inline bool +xfs_pwork_want_abort( + struct xfs_pwork *pwork) +{ + return xfs_pwork_ctl_want_abort(pwork->pctl); +} + +int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl, + xfs_pwork_work_fn work_fn, const char *tag, + unsigned int nr_threads); +void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork); +int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl); +void xfs_pwork_poll(struct xfs_pwork_ctl *pctl); +unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp); + +#endif /* __XFS_PWORK_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index aa6b6db3db0e..5e7a37f0cf84 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -13,19 +13,15 @@ #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" +#include "xfs_iwalk.h" #include "xfs_quota.h" -#include "xfs_error.h" #include "xfs_bmap.h" -#include "xfs_bmap_btree.h" #include "xfs_bmap_util.h" #include "xfs_trans.h" #include "xfs_trans_space.h" #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_cksum.h" /* * The global quota manager. There is only one of these for the entire @@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust( /* ARGSUSED */ STATIC int xfs_qm_dqusage_adjust( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* not used */ - int ubsize, /* not used */ - int *ubused, /* not used */ - int *res) /* result code value */ + struct xfs_mount *mp, + struct xfs_trans *tp, + xfs_ino_t ino, + void *data) { - xfs_inode_t *ip; - xfs_qcnt_t nblks; - xfs_filblks_t rtblks = 0; /* total rt blks */ - int error; + struct xfs_inode *ip; + xfs_qcnt_t nblks; + xfs_filblks_t rtblks = 0; /* total rt blks */ + int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust( * rootino must have its resources accounted for, not so with the quota * inodes. */ - if (xfs_is_quota_inode(&mp->m_sb, ino)) { - *res = BULKSTAT_RV_NOTHING; - return -EINVAL; - } + if (xfs_is_quota_inode(&mp->m_sb, ino)) + return 0; /* * We don't _need_ to take the ilock EXCL here because quotacheck runs * at mount time and therefore nobody will be racing chown/chproj. */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip); - if (error) { - *res = BULKSTAT_RV_NOTHING; + error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip); + if (error == -EINVAL || error == -ENOENT) + return 0; + if (error) return error; - } ASSERT(ip->i_delayed_blks == 0); @@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (!(ifp->if_flags & XFS_IFEXTENTS)) { - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); if (error) goto error0; } @@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust( goto error0; } - xfs_irele(ip); - *res = BULKSTAT_RV_DIDONE; - return 0; - error0: xfs_irele(ip); - *res = BULKSTAT_RV_GIVEUP; return error; } @@ -1270,18 +1257,13 @@ STATIC int xfs_qm_quotacheck( xfs_mount_t *mp) { - int done, count, error, error2; - xfs_ino_t lastino; - size_t structsz; + int error, error2; uint flags; LIST_HEAD (buffer_list); struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; - count = INT_MAX; - structsz = 1; - lastino = 0; flags = 0; ASSERT(uip || gip || pip); @@ -1318,18 +1300,10 @@ xfs_qm_quotacheck( flags |= XFS_PQUOTA_CHKD; } - do { - /* - * Iterate thru all the inodes in the file system, - * adjusting the corresponding dquot counters in core. - */ - error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, - structsz, NULL, &done); - if (error) - break; - - } while (!done); + error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true, + NULL); + if (error) + goto error_return; /* * We've made all the changes that we need to make incore. Flush them diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 3091e4bc04ef..5d72e88598b4 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -5,13 +5,13 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_qm.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index b3190890f096..da7ad0383037 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -4,7 +4,6 @@ * All Rights Reserved. */ -#include <linux/capability.h> #include "xfs.h" #include "xfs_fs.h" @@ -12,17 +11,13 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" -#include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_error.h" #include "xfs_quota.h" #include "xfs_qm.h" -#include "xfs_trace.h" #include "xfs_icache.h" -#include "xfs_defer.h" STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index a7c0c657dfaf..cd6c7210a373 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -4,6 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -11,10 +12,8 @@ #include "xfs_inode.h" #include "xfs_quota.h" #include "xfs_trans.h" -#include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_qm.h" -#include <linux/quota.h> static void diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index fce38b56b962..d8288aa0670a 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -14,7 +14,6 @@ #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_refcount_item.h" #include "xfs_log.h" #include "xfs_refcount.h" @@ -95,15 +94,6 @@ xfs_cui_item_format( } /* - * Pinning has no meaning for an cui item, so just return. - */ -STATIC void -xfs_cui_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an CUI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the CUI transaction has been successfully committed to make it @@ -122,71 +112,22 @@ xfs_cui_item_unpin( } /* - * CUI items have no locking or pushing. However, since CUIs are pulled from - * the AIL when their corresponding CUDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the CUI out of - * the AIL. - */ -STATIC uint -xfs_cui_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The CUI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an CUD isn't going to be * constructed and thus we free the CUI here directly. */ STATIC void -xfs_cui_item_unlock( +xfs_cui_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_cui_release(CUI_ITEM(lip)); + xfs_cui_release(CUI_ITEM(lip)); } -/* - * The CUI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_cui_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * The CUI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_cui_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all cui log items. - */ static const struct xfs_item_ops xfs_cui_item_ops = { .iop_size = xfs_cui_item_size, .iop_format = xfs_cui_item_format, - .iop_pin = xfs_cui_item_pin, .iop_unpin = xfs_cui_item_unpin, - .iop_unlock = xfs_cui_item_unlock, - .iop_committed = xfs_cui_item_committed, - .iop_push = xfs_cui_item_push, - .iop_committing = xfs_cui_item_committing, + .iop_release = xfs_cui_item_release, }; /* @@ -254,126 +195,250 @@ xfs_cud_item_format( } /* - * Pinning has no meaning for an cud item, so just return. + * The CUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the CUI and free the + * CUD. */ STATIC void -xfs_cud_item_pin( +xfs_cud_item_release( struct xfs_log_item *lip) { + struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + + xfs_cui_release(cudp->cud_cuip); + kmem_zone_free(xfs_cud_zone, cudp); } -/* - * Since pinning has no meaning for an cud item, unpinning does - * not either. - */ -STATIC void -xfs_cud_item_unpin( - struct xfs_log_item *lip, - int remove) +static const struct xfs_item_ops xfs_cud_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_cud_item_size, + .iop_format = xfs_cud_item_format, + .iop_release = xfs_cud_item_release, +}; + +static struct xfs_cud_log_item * +xfs_trans_get_cud( + struct xfs_trans *tp, + struct xfs_cui_log_item *cuip) { + struct xfs_cud_log_item *cudp; + + cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); + xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD, + &xfs_cud_item_ops); + cudp->cud_cuip = cuip; + cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; + + xfs_trans_add_item(tp, &cudp->cud_item); + return cudp; } /* - * There isn't much you can do to push on an cud item. It is simply stuck - * waiting for the log to be flushed to disk. + * Finish an refcount update and log it to the CUD. Note that the + * transaction is marked dirty regardless of whether the refcount + * update succeeds or fails to support the CUI/CUD lifecycle rules. */ -STATIC uint -xfs_cud_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +static int +xfs_trans_log_finish_refcount_update( + struct xfs_trans *tp, + struct xfs_cud_log_item *cudp, + enum xfs_refcount_intent_type type, + xfs_fsblock_t startblock, + xfs_extlen_t blockcount, + xfs_fsblock_t *new_fsb, + xfs_extlen_t *new_len, + struct xfs_btree_cur **pcur) { - return XFS_ITEM_PINNED; + int error; + + error = xfs_refcount_finish_one(tp, type, startblock, + blockcount, new_fsb, new_len, pcur); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the CUI and frees the CUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); + + return error; } -/* - * The CUD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the CUI and free the - * CUD. - */ -STATIC void -xfs_cud_item_unlock( - struct xfs_log_item *lip) +/* Sort refcount intents by AG. */ +static int +xfs_refcount_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + struct xfs_mount *mp = priv; + struct xfs_refcount_intent *ra; + struct xfs_refcount_intent *rb; + + ra = container_of(a, struct xfs_refcount_intent, ri_list); + rb = container_of(b, struct xfs_refcount_intent, ri_list); + return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - + XFS_FSB_TO_AGNO(mp, rb->ri_startblock); +} - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_cui_release(cudp->cud_cuip); - kmem_zone_free(xfs_cud_zone, cudp); +/* Get an CUI. */ +STATIC void * +xfs_refcount_update_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_cui_log_item *cuip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + cuip = xfs_cui_init(tp->t_mountp, count); + ASSERT(cuip != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &cuip->cui_item); + return cuip; +} + +/* Set the phys extent flags for this reverse mapping. */ +static void +xfs_trans_set_refcount_flags( + struct xfs_phys_extent *refc, + enum xfs_refcount_intent_type type) +{ + refc->pe_flags = 0; + switch (type) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + refc->pe_flags |= type; + break; + default: + ASSERT(0); } } -/* - * When the cud item is committed to disk, all we need to do is delete our - * reference to our partner cui item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from - * further referencing this item. - */ -STATIC xfs_lsn_t -xfs_cud_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Log refcount updates in the intent item. */ +STATIC void +xfs_refcount_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { - struct xfs_cud_log_item *cudp = CUD_ITEM(lip); + struct xfs_cui_log_item *cuip = intent; + struct xfs_refcount_intent *refc; + uint next_extent; + struct xfs_phys_extent *ext; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); /* - * Drop the CUI reference regardless of whether the CUD has been - * aborted. Once the CUD transaction is constructed, it is the sole - * responsibility of the CUD to release the CUI (even if the CUI is - * aborted due to log I/O error). + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. */ - xfs_cui_release(cudp->cud_cuip); - kmem_zone_free(xfs_cud_zone, cudp); + next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; + ASSERT(next_extent < cuip->cui_format.cui_nextents); + ext = &cuip->cui_format.cui_extents[next_extent]; + ext->pe_startblock = refc->ri_startblock; + ext->pe_len = refc->ri_blockcount; + xfs_trans_set_refcount_flags(ext, refc->ri_type); +} - return (xfs_lsn_t)-1; +/* Get an CUD so we can process all the deferred refcount updates. */ +STATIC void * +xfs_refcount_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_cud(tp, intent); } -/* - * The CUD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_cud_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Process a deferred refcount update. */ +STATIC int +xfs_refcount_update_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) { + struct xfs_refcount_intent *refc; + xfs_fsblock_t new_fsb; + xfs_extlen_t new_aglen; + int error; + + refc = container_of(item, struct xfs_refcount_intent, ri_list); + error = xfs_trans_log_finish_refcount_update(tp, done_item, + refc->ri_type, + refc->ri_startblock, + refc->ri_blockcount, + &new_fsb, &new_aglen, + (struct xfs_btree_cur **)state); + /* Did we run out of reservation? Requeue what we didn't finish. */ + if (!error && new_aglen > 0) { + ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || + refc->ri_type == XFS_REFCOUNT_DECREASE); + refc->ri_startblock = new_fsb; + refc->ri_blockcount = new_aglen; + return -EAGAIN; + } + kmem_free(refc); + return error; } -/* - * This is the ops vector shared by all cud log items. - */ -static const struct xfs_item_ops xfs_cud_item_ops = { - .iop_size = xfs_cud_item_size, - .iop_format = xfs_cud_item_format, - .iop_pin = xfs_cud_item_pin, - .iop_unpin = xfs_cud_item_unpin, - .iop_unlock = xfs_cud_item_unlock, - .iop_committed = xfs_cud_item_committed, - .iop_push = xfs_cud_item_push, - .iop_committing = xfs_cud_item_committing, -}; +/* Clean up after processing deferred refcounts. */ +STATIC void +xfs_refcount_update_finish_cleanup( + struct xfs_trans *tp, + void *state, + int error) +{ + struct xfs_btree_cur *rcur = state; -/* - * Allocate and initialize an cud item with the given number of extents. - */ -struct xfs_cud_log_item * -xfs_cud_init( - struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + xfs_refcount_finish_one_cleanup(tp, rcur, error); +} +/* Abort all pending CUIs. */ +STATIC void +xfs_refcount_update_abort_intent( + void *intent) { - struct xfs_cud_log_item *cudp; + xfs_cui_release(intent); +} - cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); - xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); - cudp->cud_cuip = cuip; - cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; +/* Cancel a deferred refcount update. */ +STATIC void +xfs_refcount_update_cancel_item( + struct list_head *item) +{ + struct xfs_refcount_intent *refc; - return cudp; + refc = container_of(item, struct xfs_refcount_intent, ri_list); + kmem_free(refc); } +const struct xfs_defer_op_type xfs_refcount_update_defer_type = { + .max_items = XFS_CUI_MAX_FAST_EXTENTS, + .diff_items = xfs_refcount_update_diff_items, + .create_intent = xfs_refcount_update_create_intent, + .abort_intent = xfs_refcount_update_abort_intent, + .log_item = xfs_refcount_update_log_item, + .create_done = xfs_refcount_update_create_done, + .finish_item = xfs_refcount_update_finish_item, + .finish_cleanup = xfs_refcount_update_finish_cleanup, + .cancel_item = xfs_refcount_update_cancel_item, +}; + /* * Process a refcount update intent item that was recovered from the log. * We need to update the refcountbt. diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 3896dcc2368f..e47530f30489 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_cui_zone; extern struct kmem_zone *xfs_cud_zone; struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); -struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, - struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 680ae7662a78..c4ec7afd1170 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -11,21 +11,12 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" #include "xfs_inode.h" #include "xfs_trans.h" -#include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" -#include "xfs_error.h" -#include "xfs_dir2.h" -#include "xfs_dir2_priv.h" -#include "xfs_ioctl.h" #include "xfs_trace.h" -#include "xfs_log.h" #include "xfs_icache.h" -#include "xfs_pnfs.h" #include "xfs_btree.h" #include "xfs_refcount_btree.h" #include "xfs_refcount.h" @@ -33,11 +24,9 @@ #include "xfs_trans_space.h" #include "xfs_bit.h" #include "xfs_alloc.h" -#include "xfs_quota_defs.h" #include "xfs_quota.h" #include "xfs_reflink.h" #include "xfs_iomap.h" -#include "xfs_rmap_btree.h" #include "xfs_sb.h" #include "xfs_ag_resv.h" @@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range( /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, - 0, 0, XFS_TRANS_NOFS, &tp); + 0, 0, 0, &tp); if (error) goto out; @@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent( resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, - XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); + XFS_TRANS_RESERVE, &tp); if (error) return error; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 127dc9c32a54..77ed557b6127 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -14,7 +14,6 @@ #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" -#include "xfs_buf_item.h" #include "xfs_rmap_item.h" #include "xfs_log.h" #include "xfs_rmap.h" @@ -94,15 +93,6 @@ xfs_rui_item_format( } /* - * Pinning has no meaning for an rui item, so just return. - */ -STATIC void -xfs_rui_item_pin( - struct xfs_log_item *lip) -{ -} - -/* * The unpin operation is the last place an RUI is manipulated in the log. It is * either inserted in the AIL or aborted in the event of a log I/O error. In * either case, the RUI transaction has been successfully committed to make it @@ -121,71 +111,22 @@ xfs_rui_item_unpin( } /* - * RUI items have no locking or pushing. However, since RUIs are pulled from - * the AIL when their corresponding RUDs are committed to disk, their situation - * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller - * will eventually flush the log. This should help in getting the RUI out of - * the AIL. - */ -STATIC uint -xfs_rui_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) -{ - return XFS_ITEM_PINNED; -} - -/* * The RUI has been either committed or aborted if the transaction has been * cancelled. If the transaction was cancelled, an RUD isn't going to be * constructed and thus we free the RUI here directly. */ STATIC void -xfs_rui_item_unlock( +xfs_rui_item_release( struct xfs_log_item *lip) { - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) - xfs_rui_release(RUI_ITEM(lip)); + xfs_rui_release(RUI_ITEM(lip)); } -/* - * The RUI is logged only once and cannot be moved in the log, so simply return - * the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_rui_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * The RUI dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ -STATIC void -xfs_rui_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector shared by all rui log items. - */ static const struct xfs_item_ops xfs_rui_item_ops = { .iop_size = xfs_rui_item_size, .iop_format = xfs_rui_item_format, - .iop_pin = xfs_rui_item_pin, .iop_unpin = xfs_rui_item_unpin, - .iop_unlock = xfs_rui_item_unlock, - .iop_committed = xfs_rui_item_committed, - .iop_push = xfs_rui_item_push, - .iop_committing = xfs_rui_item_committing, + .iop_release = xfs_rui_item_release, }; /* @@ -275,126 +216,271 @@ xfs_rud_item_format( } /* - * Pinning has no meaning for an rud item, so just return. + * The RUD is either committed or aborted if the transaction is cancelled. If + * the transaction is cancelled, drop our reference to the RUI and free the + * RUD. */ STATIC void -xfs_rud_item_pin( +xfs_rud_item_release( struct xfs_log_item *lip) { + struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + + xfs_rui_release(rudp->rud_ruip); + kmem_zone_free(xfs_rud_zone, rudp); } -/* - * Since pinning has no meaning for an rud item, unpinning does - * not either. - */ -STATIC void -xfs_rud_item_unpin( - struct xfs_log_item *lip, - int remove) +static const struct xfs_item_ops xfs_rud_item_ops = { + .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, + .iop_size = xfs_rud_item_size, + .iop_format = xfs_rud_item_format, + .iop_release = xfs_rud_item_release, +}; + +static struct xfs_rud_log_item * +xfs_trans_get_rud( + struct xfs_trans *tp, + struct xfs_rui_log_item *ruip) { + struct xfs_rud_log_item *rudp; + + rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); + xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD, + &xfs_rud_item_ops); + rudp->rud_ruip = ruip; + rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; + + xfs_trans_add_item(tp, &rudp->rud_item); + return rudp; } -/* - * There isn't much you can do to push on an rud item. It is simply stuck - * waiting for the log to be flushed to disk. - */ -STATIC uint -xfs_rud_item_push( - struct xfs_log_item *lip, - struct list_head *buffer_list) +/* Set the map extent flags for this reverse mapping. */ +static void +xfs_trans_set_rmap_flags( + struct xfs_map_extent *rmap, + enum xfs_rmap_intent_type type, + int whichfork, + xfs_exntst_t state) { - return XFS_ITEM_PINNED; + rmap->me_flags = 0; + if (state == XFS_EXT_UNWRITTEN) + rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; + if (whichfork == XFS_ATTR_FORK) + rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; + switch (type) { + case XFS_RMAP_MAP: + rmap->me_flags |= XFS_RMAP_EXTENT_MAP; + break; + case XFS_RMAP_MAP_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; + break; + case XFS_RMAP_UNMAP: + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; + break; + case XFS_RMAP_UNMAP_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; + break; + case XFS_RMAP_CONVERT: + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; + break; + case XFS_RMAP_CONVERT_SHARED: + rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; + break; + case XFS_RMAP_ALLOC: + rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; + break; + case XFS_RMAP_FREE: + rmap->me_flags |= XFS_RMAP_EXTENT_FREE; + break; + default: + ASSERT(0); + } } /* - * The RUD is either committed or aborted if the transaction is cancelled. If - * the transaction is cancelled, drop our reference to the RUI and free the - * RUD. + * Finish an rmap update and log it to the RUD. Note that the transaction is + * marked dirty regardless of whether the rmap update succeeds or fails to + * support the RUI/RUD lifecycle rules. */ -STATIC void -xfs_rud_item_unlock( - struct xfs_log_item *lip) +static int +xfs_trans_log_finish_rmap_update( + struct xfs_trans *tp, + struct xfs_rud_log_item *rudp, + enum xfs_rmap_intent_type type, + uint64_t owner, + int whichfork, + xfs_fileoff_t startoff, + xfs_fsblock_t startblock, + xfs_filblks_t blockcount, + xfs_exntst_t state, + struct xfs_btree_cur **pcur) { - struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + int error; - if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { - xfs_rui_release(rudp->rud_ruip); - kmem_zone_free(xfs_rud_zone, rudp); - } + error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, + startblock, blockcount, state, pcur); + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the RUI and frees the RUD + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); + + return error; } -/* - * When the rud item is committed to disk, all we need to do is delete our - * reference to our partner rui item and then free ourselves. Since we're - * freeing ourselves we must return -1 to keep the transaction code from - * further referencing this item. - */ -STATIC xfs_lsn_t -xfs_rud_item_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +/* Sort rmap intents by AG. */ +static int +xfs_rmap_update_diff_items( + void *priv, + struct list_head *a, + struct list_head *b) { - struct xfs_rud_log_item *rudp = RUD_ITEM(lip); + struct xfs_mount *mp = priv; + struct xfs_rmap_intent *ra; + struct xfs_rmap_intent *rb; + + ra = container_of(a, struct xfs_rmap_intent, ri_list); + rb = container_of(b, struct xfs_rmap_intent, ri_list); + return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - + XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); +} + +/* Get an RUI. */ +STATIC void * +xfs_rmap_update_create_intent( + struct xfs_trans *tp, + unsigned int count) +{ + struct xfs_rui_log_item *ruip; + + ASSERT(tp != NULL); + ASSERT(count > 0); + + ruip = xfs_rui_init(tp->t_mountp, count); + ASSERT(ruip != NULL); /* - * Drop the RUI reference regardless of whether the RUD has been - * aborted. Once the RUD transaction is constructed, it is the sole - * responsibility of the RUD to release the RUI (even if the RUI is - * aborted due to log I/O error). + * Get a log_item_desc to point at the new item. */ - xfs_rui_release(rudp->rud_ruip); - kmem_zone_free(xfs_rud_zone, rudp); - - return (xfs_lsn_t)-1; + xfs_trans_add_item(tp, &ruip->rui_item); + return ruip; } -/* - * The RUD dependency tracking op doesn't do squat. It can't because - * it doesn't know where the free extent is coming from. The dependency - * tracking has to be handled by the "enclosing" metadata object. For - * example, for inodes, the inode is locked throughout the extent freeing - * so the dependency should be recorded there. - */ +/* Log rmap updates in the intent item. */ STATIC void -xfs_rud_item_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) +xfs_rmap_update_log_item( + struct xfs_trans *tp, + void *intent, + struct list_head *item) { + struct xfs_rui_log_item *ruip = intent; + struct xfs_rmap_intent *rmap; + uint next_extent; + struct xfs_map_extent *map; + + rmap = container_of(item, struct xfs_rmap_intent, ri_list); + + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); + + /* + * atomic_inc_return gives us the value after the increment; + * we want to use it as an array index so we need to subtract 1 from + * it. + */ + next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; + ASSERT(next_extent < ruip->rui_format.rui_nextents); + map = &ruip->rui_format.rui_extents[next_extent]; + map->me_owner = rmap->ri_owner; + map->me_startblock = rmap->ri_bmap.br_startblock; + map->me_startoff = rmap->ri_bmap.br_startoff; + map->me_len = rmap->ri_bmap.br_blockcount; + xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, + rmap->ri_bmap.br_state); } -/* - * This is the ops vector shared by all rud log items. - */ -static const struct xfs_item_ops xfs_rud_item_ops = { - .iop_size = xfs_rud_item_size, - .iop_format = xfs_rud_item_format, - .iop_pin = xfs_rud_item_pin, - .iop_unpin = xfs_rud_item_unpin, - .iop_unlock = xfs_rud_item_unlock, - .iop_committed = xfs_rud_item_committed, - .iop_push = xfs_rud_item_push, - .iop_committing = xfs_rud_item_committing, -}; +/* Get an RUD so we can process all the deferred rmap updates. */ +STATIC void * +xfs_rmap_update_create_done( + struct xfs_trans *tp, + void *intent, + unsigned int count) +{ + return xfs_trans_get_rud(tp, intent); +} -/* - * Allocate and initialize an rud item with the given number of extents. - */ -struct xfs_rud_log_item * -xfs_rud_init( - struct xfs_mount *mp, - struct xfs_rui_log_item *ruip) +/* Process a deferred rmap update. */ +STATIC int +xfs_rmap_update_finish_item( + struct xfs_trans *tp, + struct list_head *item, + void *done_item, + void **state) +{ + struct xfs_rmap_intent *rmap; + int error; + + rmap = container_of(item, struct xfs_rmap_intent, ri_list); + error = xfs_trans_log_finish_rmap_update(tp, done_item, + rmap->ri_type, + rmap->ri_owner, rmap->ri_whichfork, + rmap->ri_bmap.br_startoff, + rmap->ri_bmap.br_startblock, + rmap->ri_bmap.br_blockcount, + rmap->ri_bmap.br_state, + (struct xfs_btree_cur **)state); + kmem_free(rmap); + return error; +} + +/* Clean up after processing deferred rmaps. */ +STATIC void +xfs_rmap_update_finish_cleanup( + struct xfs_trans *tp, + void *state, + int error) +{ + struct xfs_btree_cur *rcur = state; + + xfs_rmap_finish_one_cleanup(tp, rcur, error); +} +/* Abort all pending RUIs. */ +STATIC void +xfs_rmap_update_abort_intent( + void *intent) { - struct xfs_rud_log_item *rudp; + xfs_rui_release(intent); +} - rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); - xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); - rudp->rud_ruip = ruip; - rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; +/* Cancel a deferred rmap update. */ +STATIC void +xfs_rmap_update_cancel_item( + struct list_head *item) +{ + struct xfs_rmap_intent *rmap; - return rudp; + rmap = container_of(item, struct xfs_rmap_intent, ri_list); + kmem_free(rmap); } +const struct xfs_defer_op_type xfs_rmap_update_defer_type = { + .max_items = XFS_RUI_MAX_FAST_EXTENTS, + .diff_items = xfs_rmap_update_diff_items, + .create_intent = xfs_rmap_update_create_intent, + .abort_intent = xfs_rmap_update_abort_intent, + .log_item = xfs_rmap_update_log_item, + .create_done = xfs_rmap_update_create_done, + .finish_item = xfs_rmap_update_finish_item, + .finish_cleanup = xfs_rmap_update_finish_cleanup, + .cancel_item = xfs_rmap_update_cancel_item, +}; + /* * Process an rmap update intent item that was recovered from the log. * We need to update the rmapbt. diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h index 7e482baa27f5..8708e4a5aa5c 100644 --- a/fs/xfs/xfs_rmap_item.h +++ b/fs/xfs/xfs_rmap_item.h @@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_rui_zone; extern struct kmem_zone *xfs_rud_zone; struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); -struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *, - struct xfs_rui_log_item *); int xfs_rui_copy_format(struct xfs_log_iovec *buf, struct xfs_rui_log_format *dst_rui_fmt); void xfs_rui_item_free(struct xfs_rui_log_item *); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ac0fcdad0c4e..5fa4db3c3e32 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -11,17 +11,11 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_bmap.h" -#include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_space.h" -#include "xfs_trace.h" -#include "xfs_buf.h" #include "xfs_icache.h" #include "xfs_rtalloc.h" diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index cc509743facd..113883c4f202 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -4,7 +4,6 @@ * All Rights Reserved. */ #include "xfs.h" -#include <linux/proc_fs.h> struct xstats xfsstats; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index a14d11d78bd8..f9450235533c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -11,18 +11,15 @@ #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" -#include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_alloc.h" -#include "xfs_error.h" #include "xfs_fsops.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" @@ -38,18 +35,8 @@ #include "xfs_refcount_item.h" #include "xfs_bmap_item.h" #include "xfs_reflink.h" -#include "xfs_defer.h" -#include <linux/namei.h> -#include <linux/dax.h> -#include <linux/init.h> -#include <linux/slab.h> #include <linux/magic.h> -#include <linux/mount.h> -#include <linux/mempool.h> -#include <linux/writeback.h> -#include <linux/kthread.h> -#include <linux/freezer.h> #include <linux/parser.h> static const struct super_operations xfs_super_operations; @@ -582,7 +569,7 @@ xfs_set_inode_alloc( * Calculate how much should be reserved for inodes to meet * the max inode percentage. Used only for inode32. */ - if (mp->m_maxicount) { + if (M_IGEO(mp)->maxicount) { uint64_t icount; icount = sbp->sb_dblocks * sbp->sb_imax_pct; @@ -840,16 +827,10 @@ xfs_init_mount_workqueues( if (!mp->m_reclaim_workqueue) goto out_destroy_cil; - mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0, - mp->m_fsname); - if (!mp->m_log_workqueue) - goto out_destroy_reclaim; - mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); if (!mp->m_eofblocks_workqueue) - goto out_destroy_log; + goto out_destroy_reclaim; mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, mp->m_fsname); @@ -860,8 +841,6 @@ xfs_init_mount_workqueues( out_destroy_eofb: destroy_workqueue(mp->m_eofblocks_workqueue); -out_destroy_log: - destroy_workqueue(mp->m_log_workqueue); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); out_destroy_cil: @@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues( { destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); - destroy_workqueue(mp->m_log_workqueue); destroy_workqueue(mp->m_reclaim_workqueue); destroy_workqueue(mp->m_cil_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); @@ -1131,10 +1109,10 @@ xfs_fs_statfs( fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) + if (M_IGEO(mp)->maxicount) statp->f_files = min_t(typeof(statp->f_files), statp->f_files, - mp->m_maxicount); + M_IGEO(mp)->maxicount); /* If sb_icount overshot maxicount, report actual allocation */ statp->f_files = max_t(typeof(statp->f_files), @@ -1685,6 +1663,8 @@ xfs_fs_fill_super( sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; + sb->s_iflags |= SB_I_CGROUPWB; + set_posix_acl_flag(sb); /* version 5 superblocks support inode version counters. */ diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 21cb49a43d7c..763e43d22dee 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -38,6 +38,18 @@ extern void xfs_qm_exit(void); # define XFS_SCRUB_STRING #endif +#ifdef CONFIG_XFS_ONLINE_REPAIR +# define XFS_REPAIR_STRING "repair, " +#else +# define XFS_REPAIR_STRING +#endif + +#ifdef CONFIG_XFS_WARN +# define XFS_WARN_STRING "verbose warnings, " +#else +# define XFS_WARN_STRING +#endif + #ifdef DEBUG # define XFS_DBG_STRING "debug" #else @@ -49,6 +61,8 @@ extern void xfs_qm_exit(void); XFS_SECURITY_STRING \ XFS_REALTIME_STRING \ XFS_SCRUB_STRING \ + XFS_REPAIR_STRING \ + XFS_WARN_STRING \ XFS_DBG_STRING /* DBG must be last */ struct xfs_inode; diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index b2c1177c717f..ed66fd2de327 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -12,23 +12,14 @@ #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_mount.h" -#include "xfs_da_format.h" -#include "xfs_da_btree.h" -#include "xfs_defer.h" #include "xfs_dir2.h" #include "xfs_inode.h" -#include "xfs_ialloc.h" -#include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_bmap_btree.h" -#include "xfs_bmap_util.h" -#include "xfs_error.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_trace.h" -#include "xfs_symlink.h" #include "xfs_trans.h" -#include "xfs_log.h" /* ----- Kernel only functions below ----- */ int diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 0cc034dfb786..31b3bdbd2eba 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -4,10 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" -#include <linux/sysctl.h> -#include <linux/proc_fs.h> #include "xfs_error.h" -#include "xfs_stats.h" static struct ctl_table_header *xfs_table_header; diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h index ad7f9be13087..8abf4640f1d5 100644 --- a/fs/xfs/xfs_sysctl.h +++ b/fs/xfs/xfs_sysctl.h @@ -82,6 +82,9 @@ enum { extern xfs_param_t xfs_params; struct xfs_globals { +#ifdef DEBUG + int pwork_threads; /* parallel workqueue threads */ +#endif int log_recovery_delay; /* log recovery delay (secs) */ int mount_delay; /* mount setup delay (secs) */ bool bug_on_assert; /* BUG() the kernel on assert failure */ diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index cabda13f3c64..ddd0bf7a4740 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -10,9 +10,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sysfs.h" -#include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_stats.h" #include "xfs_mount.h" struct xfs_sysfs_attr { @@ -206,11 +204,51 @@ always_cow_show( } XFS_SYSFS_ATTR_RW(always_cow); +#ifdef DEBUG +/* + * Override how many threads the parallel work queue is allowed to create. + * This has to be a debug-only global (instead of an errortag) because one of + * the main users of parallel workqueues is mount time quotacheck. + */ +STATIC ssize_t +pwork_threads_store( + struct kobject *kobject, + const char *buf, + size_t count) +{ + int ret; + int val; + + ret = kstrtoint(buf, 0, &val); + if (ret) + return ret; + + if (val < -1 || val > num_possible_cpus()) + return -EINVAL; + + xfs_globals.pwork_threads = val; + + return count; +} + +STATIC ssize_t +pwork_threads_show( + struct kobject *kobject, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads); +} +XFS_SYSFS_ATTR_RW(pwork_threads); +#endif /* DEBUG */ + static struct attribute *xfs_dbg_attrs[] = { ATTR_LIST(bug_on_assert), ATTR_LIST(log_recovery_delay), ATTR_LIST(mount_delay), ATTR_LIST(always_cow), +#ifdef DEBUG + ATTR_LIST(pwork_threads), +#endif NULL, }; diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index cb6489c22cad..bc85b89f88ca 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -15,24 +15,16 @@ #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_da_btree.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" #include "xfs_alloc.h" #include "xfs_bmap.h" #include "xfs_attr.h" -#include "xfs_attr_leaf.h" #include "xfs_trans.h" -#include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_buf_item.h" #include "xfs_quota.h" -#include "xfs_iomap.h" -#include "xfs_aops.h" #include "xfs_dquot_item.h" #include "xfs_dquot.h" #include "xfs_log_recover.h" -#include "xfs_inode_item.h" -#include "xfs_bmap_btree.h" #include "xfs_filestream.h" #include "xfs_fsmap.h" diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 2464ea351f83..8094b1920eef 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); @@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup); DEFINE_TRANS_EVENT(xfs_trans_free); DEFINE_TRANS_EVENT(xfs_trans_roll); DEFINE_TRANS_EVENT(xfs_trans_add_item); +DEFINE_TRANS_EVENT(xfs_trans_commit_items); DEFINE_TRANS_EVENT(xfs_trans_free_items); TRACE_EVENT(xfs_iunlink_update_bucket, @@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name, \ DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); +TRACE_EVENT(xfs_iwalk_ag, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agino_t startino), + TP_ARGS(mp, agno, startino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, startino) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->startino = startino; + ), + TP_printk("dev %d:%d agno %d startino %u", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __entry->startino) +) + +TRACE_EVENT(xfs_iwalk_ag_rec, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + struct xfs_inobt_rec_incore *irec), + TP_ARGS(mp, agno, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agino_t, startino) + __field(uint64_t, freemask) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->startino = irec->ir_startino; + __entry->freemask = irec->ir_free; + ), + TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, + __entry->startino, __entry->freemask) +) + +TRACE_EVENT(xfs_pwork_init, + TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid), + TP_ARGS(mp, nr_threads, pid), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, nr_threads) + __field(pid_t, pid) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->nr_threads = nr_threads; + __entry->pid = pid; + ), + TP_printk("dev %d:%d nr_threads %u pid %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->nr_threads, __entry->pid) +) + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 912b42f5fe4a..d42a68d8313b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -11,7 +11,6 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_extent_busy.h" #include "xfs_quota.h" #include "xfs_trans.h" @@ -264,9 +263,7 @@ xfs_trans_alloc( * GFP_NOFS allocation context so that we avoid lockdep false positives * by doing GFP_KERNEL allocations inside sb_start_intwrite(). */ - tp = kmem_zone_zalloc(xfs_trans_zone, - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); - + tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); @@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas( xfs_buf_t *bp; int whole = 0; - bp = xfs_trans_getsb(tp, tp->t_mountp, 0); + bp = xfs_trans_getsb(tp, tp->t_mountp); sbp = XFS_BUF_TO_SBP(bp); /* @@ -767,10 +764,9 @@ xfs_trans_del_item( } /* Detach and unlock all of the items in a transaction */ -void +static void xfs_trans_free_items( struct xfs_trans *tp, - xfs_lsn_t commit_lsn, bool abort) { struct xfs_log_item *lip, *next; @@ -779,11 +775,10 @@ xfs_trans_free_items( list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { xfs_trans_del_item(lip); - if (commit_lsn != NULLCOMMITLSN) - lip->li_ops->iop_committing(lip, commit_lsn); if (abort) set_bit(XFS_LI_ABORTED, &lip->li_flags); - lip->li_ops->iop_unlock(lip); + if (lip->li_ops->iop_release) + lip->li_ops->iop_release(lip); } } @@ -804,7 +799,8 @@ xfs_log_item_batch_insert( for (i = 0; i < nr_items; i++) { struct xfs_log_item *lip = log_items[i]; - lip->li_ops->iop_unpin(lip, 0); + if (lip->li_ops->iop_unpin) + lip->li_ops->iop_unpin(lip, 0); } } @@ -815,7 +811,7 @@ xfs_log_item_batch_insert( * * If we are called with the aborted flag set, it is because a log write during * a CIL checkpoint commit has failed. In this case, all the items in the - * checkpoint have already gone through iop_commited and iop_unlock, which + * checkpoint have already gone through iop_committed and iop_committing, which * means that checkpoint commit abort handling is treated exactly the same * as an iclog write error even though we haven't started any IO yet. Hence in * this case all we need to do is iop_committed processing, followed by an @@ -833,7 +829,7 @@ xfs_trans_committed_bulk( struct xfs_ail *ailp, struct xfs_log_vec *log_vector, xfs_lsn_t commit_lsn, - int aborted) + bool aborted) { #define LOG_ITEM_BATCH_SIZE 32 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; @@ -852,7 +848,16 @@ xfs_trans_committed_bulk( if (aborted) set_bit(XFS_LI_ABORTED, &lip->li_flags); - item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); + + if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) { + lip->li_ops->iop_release(lip); + continue; + } + + if (lip->li_ops->iop_committed) + item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); + else + item_lsn = commit_lsn; /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) @@ -864,7 +869,8 @@ xfs_trans_committed_bulk( */ if (aborted) { ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); - lip->li_ops->iop_unpin(lip, 1); + if (lip->li_ops->iop_unpin) + lip->li_ops->iop_unpin(lip, 1); continue; } @@ -882,7 +888,8 @@ xfs_trans_committed_bulk( xfs_trans_ail_update(ailp, lip, item_lsn); else spin_unlock(&ailp->ail_lock); - lip->li_ops->iop_unpin(lip, 0); + if (lip->li_ops->iop_unpin) + lip->li_ops->iop_unpin(lip, 0); continue; } @@ -998,7 +1005,7 @@ out_unreserve: tp->t_ticket = NULL; } current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); + xfs_trans_free_items(tp, !!error); xfs_trans_free(tp); XFS_STATS_INC(mp, xs_trans_empty); @@ -1060,7 +1067,7 @@ xfs_trans_cancel( /* mark this thread as no longer being in a transaction */ current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); + xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c6e1c5704a8c..64d7f171ebd3 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -27,7 +27,7 @@ struct xfs_cud_log_item; struct xfs_bui_log_item; struct xfs_bud_log_item; -typedef struct xfs_log_item { +struct xfs_log_item { struct list_head li_ail; /* AIL pointers */ struct list_head li_trans; /* transaction list */ xfs_lsn_t li_lsn; /* last on-disk lsn */ @@ -48,7 +48,7 @@ typedef struct xfs_log_item { struct xfs_log_vec *li_lv; /* active log vector */ struct xfs_log_vec *li_lv_shadow; /* standby vector */ xfs_lsn_t li_seq; /* CIL commit seq */ -} xfs_log_item_t; +}; /* * li_flags use the (set/test/clear)_bit atomic interfaces because updates can @@ -67,17 +67,24 @@ typedef struct xfs_log_item { { (1 << XFS_LI_DIRTY), "DIRTY" } struct xfs_item_ops { - void (*iop_size)(xfs_log_item_t *, int *, int *); - void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); - void (*iop_pin)(xfs_log_item_t *); - void (*iop_unpin)(xfs_log_item_t *, int remove); + unsigned flags; + void (*iop_size)(struct xfs_log_item *, int *, int *); + void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); + void (*iop_pin)(struct xfs_log_item *); + void (*iop_unpin)(struct xfs_log_item *, int remove); uint (*iop_push)(struct xfs_log_item *, struct list_head *); - void (*iop_unlock)(xfs_log_item_t *); - xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); - void (*iop_error)(xfs_log_item_t *, xfs_buf_t *); + void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn); + void (*iop_release)(struct xfs_log_item *); + xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); + void (*iop_error)(struct xfs_log_item *, xfs_buf_t *); }; +/* + * Release the log item as soon as committed. This is for items just logging + * intents that never need to be written back in place. + */ +#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0) + void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, int type, const struct xfs_item_ops *ops); @@ -203,7 +210,7 @@ xfs_trans_read_buf( flags, bpp, ops); } -struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); +struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); @@ -223,14 +230,6 @@ void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *); bool xfs_trans_buf_is_dirty(struct xfs_buf *bp); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); -struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *, - struct xfs_efi_log_item *, - uint); -int xfs_trans_free_extent(struct xfs_trans *, - struct xfs_efd_log_item *, xfs_fsblock_t, - xfs_extlen_t, - const struct xfs_owner_info *, - bool); int xfs_trans_commit(struct xfs_trans *); int xfs_trans_roll(struct xfs_trans **); int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *); @@ -245,37 +244,4 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, extern kmem_zone_t *xfs_trans_zone; -/* rmap updates */ -enum xfs_rmap_intent_type; - -struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp, - struct xfs_rui_log_item *ruip); -int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp, - struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type, - uint64_t owner, int whichfork, xfs_fileoff_t startoff, - xfs_fsblock_t startblock, xfs_filblks_t blockcount, - xfs_exntst_t state, struct xfs_btree_cur **pcur); - -/* refcount updates */ -enum xfs_refcount_intent_type; - -struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp, - struct xfs_cui_log_item *cuip); -int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp, - struct xfs_cud_log_item *cudp, - enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, - xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); - -/* mapping updates */ -enum xfs_bmap_intent_type; - -struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp, - struct xfs_bui_log_item *buip); -int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp, - struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type, - struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff, - xfs_fsblock_t startblock, xfs_filblks_t *blockcount, - xfs_exntst_t state); - #endif /* __XFS_TRANS_H__ */ diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index d3a4e89bf4a0..6ccfd75d3c24 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -6,6 +6,7 @@ */ #include "xfs.h" #include "xfs_fs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" @@ -74,29 +75,29 @@ xfs_ail_check( * Return a pointer to the last item in the AIL. If the AIL is empty, then * return NULL. */ -static xfs_log_item_t * +static struct xfs_log_item * xfs_ail_max( struct xfs_ail *ailp) { if (list_empty(&ailp->ail_head)) return NULL; - return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail); + return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail); } /* * Return a pointer to the item which follows the given item in the AIL. If * the given item is the last item in the list, then return NULL. */ -static xfs_log_item_t * +static struct xfs_log_item * xfs_ail_next( - struct xfs_ail *ailp, - xfs_log_item_t *lip) + struct xfs_ail *ailp, + struct xfs_log_item *lip) { if (lip->li_ail.next == &ailp->ail_head) return NULL; - return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); + return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail); } /* @@ -109,10 +110,10 @@ xfs_ail_next( */ xfs_lsn_t xfs_ail_min_lsn( - struct xfs_ail *ailp) + struct xfs_ail *ailp) { - xfs_lsn_t lsn = 0; - xfs_log_item_t *lip; + xfs_lsn_t lsn = 0; + struct xfs_log_item *lip; spin_lock(&ailp->ail_lock); lip = xfs_ail_min(ailp); @@ -128,10 +129,10 @@ xfs_ail_min_lsn( */ static xfs_lsn_t xfs_ail_max_lsn( - struct xfs_ail *ailp) + struct xfs_ail *ailp) { - xfs_lsn_t lsn = 0; - xfs_log_item_t *lip; + xfs_lsn_t lsn = 0; + struct xfs_log_item *lip; spin_lock(&ailp->ail_lock); lip = xfs_ail_max(ailp); @@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear( * ascending traversal. Pass a @lsn of zero to initialise the cursor to the * first item in the AIL. Returns NULL if the list is empty. */ -xfs_log_item_t * +struct xfs_log_item * xfs_trans_ail_cursor_first( struct xfs_ail *ailp, struct xfs_ail_cursor *cur, xfs_lsn_t lsn) { - xfs_log_item_t *lip; + struct xfs_log_item *lip; xfs_trans_ail_cursor_init(ailp, cur); @@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last( struct xfs_ail *ailp, xfs_lsn_t lsn) { - xfs_log_item_t *lip; + struct xfs_log_item *lip; list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) @@ -327,8 +328,8 @@ xfs_ail_splice( */ static void xfs_ail_delete( - struct xfs_ail *ailp, - xfs_log_item_t *lip) + struct xfs_ail *ailp, + struct xfs_log_item *lip) { xfs_ail_check(ailp, lip); list_del(&lip->li_ail); @@ -347,6 +348,14 @@ xfsaild_push_item( if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) return XFS_ITEM_PINNED; + /* + * Consider the item pinned if a push callback is not defined so the + * caller will force the log. This should only happen for intent items + * as they are unpinned once the associated done item is committed to + * the on-disk log. + */ + if (!lip->li_ops->iop_push) + return XFS_ITEM_PINNED; return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); } @@ -356,7 +365,7 @@ xfsaild_push( { xfs_mount_t *mp = ailp->ail_mount; struct xfs_ail_cursor cur; - xfs_log_item_t *lip; + struct xfs_log_item *lip; xfs_lsn_t lsn; xfs_lsn_t target; long tout; @@ -611,10 +620,10 @@ xfsaild( */ void xfs_ail_push( - struct xfs_ail *ailp, - xfs_lsn_t threshold_lsn) + struct xfs_ail *ailp, + xfs_lsn_t threshold_lsn) { - xfs_log_item_t *lip; + struct xfs_log_item *lip; lip = xfs_ail_min(ailp); if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || @@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk( int nr_items, xfs_lsn_t lsn) __releases(ailp->ail_lock) { - xfs_log_item_t *mlip; + struct xfs_log_item *mlip; int mlip_changed = 0; int i; LIST_HEAD(tmp); diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c deleted file mode 100644 index e1c7d55b32c3..000000000000 --- a/fs/xfs/xfs_trans_bmap.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2016 Oracle. All Rights Reserved. - * Author: Darrick J. Wong <darrick.wong@oracle.com> - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_bmap_item.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_inode.h" - -/* - * This routine is called to allocate a "bmap update done" - * log item. - */ -struct xfs_bud_log_item * -xfs_trans_get_bud( - struct xfs_trans *tp, - struct xfs_bui_log_item *buip) -{ - struct xfs_bud_log_item *budp; - - budp = xfs_bud_init(tp->t_mountp, buip); - xfs_trans_add_item(tp, &budp->bud_item); - return budp; -} - -/* - * Finish an bmap update and log it to the BUD. Note that the - * transaction is marked dirty regardless of whether the bmap update - * succeeds or fails to support the BUI/BUD lifecycle rules. - */ -int -xfs_trans_log_finish_bmap_update( - struct xfs_trans *tp, - struct xfs_bud_log_item *budp, - enum xfs_bmap_intent_type type, - struct xfs_inode *ip, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, - xfs_exntst_t state) -{ - int error; - - error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, - startblock, blockcount, state); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the BUI and frees the BUD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); - - return error; -} - -/* Sort bmap intents by inode. */ -static int -xfs_bmap_update_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_bmap_intent *ba; - struct xfs_bmap_intent *bb; - - ba = container_of(a, struct xfs_bmap_intent, bi_list); - bb = container_of(b, struct xfs_bmap_intent, bi_list); - return ba->bi_owner->i_ino - bb->bi_owner->i_ino; -} - -/* Get an BUI. */ -STATIC void * -xfs_bmap_update_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_bui_log_item *buip; - - ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); - ASSERT(tp != NULL); - - buip = xfs_bui_init(tp->t_mountp); - ASSERT(buip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &buip->bui_item); - return buip; -} - -/* Set the map extent flags for this mapping. */ -static void -xfs_trans_set_bmap_flags( - struct xfs_map_extent *bmap, - enum xfs_bmap_intent_type type, - int whichfork, - xfs_exntst_t state) -{ - bmap->me_flags = 0; - switch (type) { - case XFS_BMAP_MAP: - case XFS_BMAP_UNMAP: - bmap->me_flags = type; - break; - default: - ASSERT(0); - } - if (state == XFS_EXT_UNWRITTEN) - bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; - if (whichfork == XFS_ATTR_FORK) - bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; -} - -/* Log bmap updates in the intent item. */ -STATIC void -xfs_bmap_update_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_bui_log_item *buip = intent; - struct xfs_bmap_intent *bmap; - uint next_extent; - struct xfs_map_extent *map; - - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; - ASSERT(next_extent < buip->bui_format.bui_nextents); - map = &buip->bui_format.bui_extents[next_extent]; - map->me_owner = bmap->bi_owner->i_ino; - map->me_startblock = bmap->bi_bmap.br_startblock; - map->me_startoff = bmap->bi_bmap.br_startoff; - map->me_len = bmap->bi_bmap.br_blockcount; - xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, - bmap->bi_bmap.br_state); -} - -/* Get an BUD so we can process all the deferred rmap updates. */ -STATIC void * -xfs_bmap_update_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_bud(tp, intent); -} - -/* Process a deferred rmap update. */ -STATIC int -xfs_bmap_update_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_bmap_intent *bmap; - xfs_filblks_t count; - int error; - - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - count = bmap->bi_bmap.br_blockcount; - error = xfs_trans_log_finish_bmap_update(tp, done_item, - bmap->bi_type, - bmap->bi_owner, bmap->bi_whichfork, - bmap->bi_bmap.br_startoff, - bmap->bi_bmap.br_startblock, - &count, - bmap->bi_bmap.br_state); - if (!error && count > 0) { - ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); - bmap->bi_bmap.br_blockcount = count; - return -EAGAIN; - } - kmem_free(bmap); - return error; -} - -/* Abort all pending BUIs. */ -STATIC void -xfs_bmap_update_abort_intent( - void *intent) -{ - xfs_bui_release(intent); -} - -/* Cancel a deferred rmap update. */ -STATIC void -xfs_bmap_update_cancel_item( - struct list_head *item) -{ - struct xfs_bmap_intent *bmap; - - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_free(bmap); -} - -const struct xfs_defer_op_type xfs_bmap_update_defer_type = { - .max_items = XFS_BUI_MAX_FAST_EXTENTS, - .diff_items = xfs_bmap_update_diff_items, - .create_intent = xfs_bmap_update_create_intent, - .abort_intent = xfs_bmap_update_abort_intent, - .log_item = xfs_bmap_update_log_item, - .create_done = xfs_bmap_update_create_done, - .finish_item = xfs_bmap_update_finish_item, - .cancel_item = xfs_bmap_update_cancel_item, -}; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 7d65ebf1e847..b5b3a78ef31c 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -10,11 +10,9 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" -#include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_trans_priv.h" -#include "xfs_error.h" #include "xfs_trace.h" /* @@ -174,8 +172,7 @@ xfs_trans_get_buf_map( xfs_buf_t * xfs_trans_getsb( xfs_trans_t *tp, - struct xfs_mount *mp, - int flags) + struct xfs_mount *mp) { xfs_buf_t *bp; struct xfs_buf_log_item *bip; @@ -185,7 +182,7 @@ xfs_trans_getsb( * if tp is NULL. */ if (tp == NULL) - return xfs_getsb(mp, flags); + return xfs_getsb(mp); /* * If the superblock buffer already has this transaction @@ -203,7 +200,7 @@ xfs_trans_getsb( return bp; } - bp = xfs_getsb(mp, flags); + bp = xfs_getsb(mp); if (bp == NULL) return NULL; @@ -428,7 +425,7 @@ xfs_trans_brelse( /* * Mark the buffer as not needing to be unlocked when the buf item's - * iop_unlock() routine is called. The buffer must already be locked + * iop_committing() routine is called. The buffer must already be locked * and associated with the given transaction. */ /* ARGSUSED */ diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index cd664a03613f..1027c9ca6eb8 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -11,7 +11,6 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" -#include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_quota.h" @@ -29,7 +28,6 @@ xfs_trans_dqjoin( xfs_trans_t *tp, xfs_dquot_t *dqp) { - ASSERT(dqp->q_transp != tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(dqp->q_logitem.qli_dquot == dqp); @@ -37,15 +35,8 @@ xfs_trans_dqjoin( * Get a log_item_desc to point at the new item. */ xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); - - /* - * Initialize d_transp so we can later determine if this dquot is - * associated with this transaction. - */ - dqp->q_transp = tp; } - /* * This is called to mark the dquot as needing * to be logged when the transaction is committed. The dquot must @@ -61,7 +52,6 @@ xfs_trans_log_dquot( xfs_trans_t *tp, xfs_dquot_t *dqp) { - ASSERT(dqp->q_transp == tp); ASSERT(XFS_DQ_IS_LOCKED(dqp)); tp->t_flags |= XFS_TRANS_DIRTY; @@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas( break; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(dqp->q_transp == tp); /* * adjust the actual number of blocks used diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c deleted file mode 100644 index 8ee7a3f8bb20..000000000000 --- a/fs/xfs/xfs_trans_extfree.c +++ /dev/null @@ -1,286 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_bit.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_extfree_item.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_trace.h" - -/* - * This routine is called to allocate an "extent free done" - * log item that will hold nextents worth of extents. The - * caller must use all nextents extents, because we are not - * flexible about this at all. - */ -struct xfs_efd_log_item * -xfs_trans_get_efd(struct xfs_trans *tp, - struct xfs_efi_log_item *efip, - uint nextents) -{ - struct xfs_efd_log_item *efdp; - - ASSERT(tp != NULL); - ASSERT(nextents > 0); - - efdp = xfs_efd_init(tp->t_mountp, efip, nextents); - ASSERT(efdp != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &efdp->efd_item); - return efdp; -} - -/* - * Free an extent and log it to the EFD. Note that the transaction is marked - * dirty regardless of whether the extent free succeeds or fails to support the - * EFI/EFD lifecycle rules. - */ -int -xfs_trans_free_extent( - struct xfs_trans *tp, - struct xfs_efd_log_item *efdp, - xfs_fsblock_t start_block, - xfs_extlen_t ext_len, - const struct xfs_owner_info *oinfo, - bool skip_discard) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_extent *extp; - uint next_extent; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, - start_block); - int error; - - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); - - error = __xfs_free_extent(tp, start_block, ext_len, - oinfo, XFS_AG_RESV_NONE, skip_discard); - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the EFI and frees the EFD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); - - next_extent = efdp->efd_next_extent; - ASSERT(next_extent < efdp->efd_format.efd_nextents); - extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = start_block; - extp->ext_len = ext_len; - efdp->efd_next_extent++; - - return error; -} - -/* Sort bmap items by AG. */ -static int -xfs_extent_free_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_mount *mp = priv; - struct xfs_extent_free_item *ra; - struct xfs_extent_free_item *rb; - - ra = container_of(a, struct xfs_extent_free_item, xefi_list); - rb = container_of(b, struct xfs_extent_free_item, xefi_list); - return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) - - XFS_FSB_TO_AGNO(mp, rb->xefi_startblock); -} - -/* Get an EFI. */ -STATIC void * -xfs_extent_free_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_efi_log_item *efip; - - ASSERT(tp != NULL); - ASSERT(count > 0); - - efip = xfs_efi_init(tp->t_mountp, count); - ASSERT(efip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &efip->efi_item); - return efip; -} - -/* Log a free extent to the intent item. */ -STATIC void -xfs_extent_free_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_efi_log_item *efip = intent; - struct xfs_extent_free_item *free; - uint next_extent; - struct xfs_extent *extp; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; - ASSERT(next_extent < efip->efi_format.efi_nextents); - extp = &efip->efi_format.efi_extents[next_extent]; - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; -} - -/* Get an EFD so we can process all the free extents. */ -STATIC void * -xfs_extent_free_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_efd(tp, intent, count); -} - -/* Process a free extent. */ -STATIC int -xfs_extent_free_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_extent_free_item *free; - int error; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - error = xfs_trans_free_extent(tp, done_item, - free->xefi_startblock, - free->xefi_blockcount, - &free->xefi_oinfo, free->xefi_skip_discard); - kmem_free(free); - return error; -} - -/* Abort all pending EFIs. */ -STATIC void -xfs_extent_free_abort_intent( - void *intent) -{ - xfs_efi_release(intent); -} - -/* Cancel a free extent. */ -STATIC void -xfs_extent_free_cancel_item( - struct list_head *item) -{ - struct xfs_extent_free_item *free; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_free(free); -} - -const struct xfs_defer_op_type xfs_extent_free_defer_type = { - .max_items = XFS_EFI_MAX_FAST_EXTENTS, - .diff_items = xfs_extent_free_diff_items, - .create_intent = xfs_extent_free_create_intent, - .abort_intent = xfs_extent_free_abort_intent, - .log_item = xfs_extent_free_log_item, - .create_done = xfs_extent_free_create_done, - .finish_item = xfs_extent_free_finish_item, - .cancel_item = xfs_extent_free_cancel_item, -}; - -/* - * AGFL blocks are accounted differently in the reserve pools and are not - * inserted into the busy extent list. - */ -STATIC int -xfs_agfl_free_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_mount *mp = tp->t_mountp; - struct xfs_efd_log_item *efdp = done_item; - struct xfs_extent_free_item *free; - struct xfs_extent *extp; - struct xfs_buf *agbp; - int error; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - uint next_extent; - - free = container_of(item, struct xfs_extent_free_item, xefi_list); - ASSERT(free->xefi_blockcount == 1); - agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); - - trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); - - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); - if (!error) - error = xfs_free_agfl_block(tp, agno, agbno, agbp, - &free->xefi_oinfo); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the EFI and frees the EFD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); - - next_extent = efdp->efd_next_extent; - ASSERT(next_extent < efdp->efd_format.efd_nextents); - extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; - efdp->efd_next_extent++; - - kmem_free(free); - return error; -} - - -/* sub-type with special handling for AGFL deferred frees */ -const struct xfs_defer_op_type xfs_agfl_free_defer_type = { - .max_items = XFS_EFI_MAX_FAST_EXTENTS, - .diff_items = xfs_extent_free_diff_items, - .create_intent = xfs_extent_free_create_intent, - .abort_intent = xfs_extent_free_abort_intent, - .log_item = xfs_extent_free_log_item, - .create_done = xfs_extent_free_create_done, - .finish_item = xfs_agfl_free_finish_item, - .cancel_item = xfs_extent_free_cancel_item, -}; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 542927321a61..93d14e47269d 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -8,13 +8,10 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" #include "xfs_inode_item.h" -#include "xfs_trace.h" #include <linux/iversion.h> diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 091eae9f4e74..2e073c1c4614 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -16,12 +16,10 @@ struct xfs_log_vec; void xfs_trans_init(struct xfs_mount *); void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); -void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, - bool abort); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, - xfs_lsn_t commit_lsn, int aborted); + xfs_lsn_t commit_lsn, bool aborted); /* * AIL traversal cursor. * diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c deleted file mode 100644 index 8d734728dd1b..000000000000 --- a/fs/xfs/xfs_trans_refcount.c +++ /dev/null @@ -1,240 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2016 Oracle. All Rights Reserved. - * Author: Darrick J. Wong <darrick.wong@oracle.com> - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_refcount_item.h" -#include "xfs_alloc.h" -#include "xfs_refcount.h" - -/* - * This routine is called to allocate a "refcount update done" - * log item. - */ -struct xfs_cud_log_item * -xfs_trans_get_cud( - struct xfs_trans *tp, - struct xfs_cui_log_item *cuip) -{ - struct xfs_cud_log_item *cudp; - - cudp = xfs_cud_init(tp->t_mountp, cuip); - xfs_trans_add_item(tp, &cudp->cud_item); - return cudp; -} - -/* - * Finish an refcount update and log it to the CUD. Note that the - * transaction is marked dirty regardless of whether the refcount - * update succeeds or fails to support the CUI/CUD lifecycle rules. - */ -int -xfs_trans_log_finish_refcount_update( - struct xfs_trans *tp, - struct xfs_cud_log_item *cudp, - enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, - xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, - struct xfs_btree_cur **pcur) -{ - int error; - - error = xfs_refcount_finish_one(tp, type, startblock, - blockcount, new_fsb, new_len, pcur); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the CUI and frees the CUD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); - - return error; -} - -/* Sort refcount intents by AG. */ -static int -xfs_refcount_update_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_mount *mp = priv; - struct xfs_refcount_intent *ra; - struct xfs_refcount_intent *rb; - - ra = container_of(a, struct xfs_refcount_intent, ri_list); - rb = container_of(b, struct xfs_refcount_intent, ri_list); - return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) - - XFS_FSB_TO_AGNO(mp, rb->ri_startblock); -} - -/* Get an CUI. */ -STATIC void * -xfs_refcount_update_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_cui_log_item *cuip; - - ASSERT(tp != NULL); - ASSERT(count > 0); - - cuip = xfs_cui_init(tp->t_mountp, count); - ASSERT(cuip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &cuip->cui_item); - return cuip; -} - -/* Set the phys extent flags for this reverse mapping. */ -static void -xfs_trans_set_refcount_flags( - struct xfs_phys_extent *refc, - enum xfs_refcount_intent_type type) -{ - refc->pe_flags = 0; - switch (type) { - case XFS_REFCOUNT_INCREASE: - case XFS_REFCOUNT_DECREASE: - case XFS_REFCOUNT_ALLOC_COW: - case XFS_REFCOUNT_FREE_COW: - refc->pe_flags |= type; - break; - default: - ASSERT(0); - } -} - -/* Log refcount updates in the intent item. */ -STATIC void -xfs_refcount_update_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_cui_log_item *cuip = intent; - struct xfs_refcount_intent *refc; - uint next_extent; - struct xfs_phys_extent *ext; - - refc = container_of(item, struct xfs_refcount_intent, ri_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; - ASSERT(next_extent < cuip->cui_format.cui_nextents); - ext = &cuip->cui_format.cui_extents[next_extent]; - ext->pe_startblock = refc->ri_startblock; - ext->pe_len = refc->ri_blockcount; - xfs_trans_set_refcount_flags(ext, refc->ri_type); -} - -/* Get an CUD so we can process all the deferred refcount updates. */ -STATIC void * -xfs_refcount_update_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_cud(tp, intent); -} - -/* Process a deferred refcount update. */ -STATIC int -xfs_refcount_update_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_refcount_intent *refc; - xfs_fsblock_t new_fsb; - xfs_extlen_t new_aglen; - int error; - - refc = container_of(item, struct xfs_refcount_intent, ri_list); - error = xfs_trans_log_finish_refcount_update(tp, done_item, - refc->ri_type, - refc->ri_startblock, - refc->ri_blockcount, - &new_fsb, &new_aglen, - (struct xfs_btree_cur **)state); - /* Did we run out of reservation? Requeue what we didn't finish. */ - if (!error && new_aglen > 0) { - ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || - refc->ri_type == XFS_REFCOUNT_DECREASE); - refc->ri_startblock = new_fsb; - refc->ri_blockcount = new_aglen; - return -EAGAIN; - } - kmem_free(refc); - return error; -} - -/* Clean up after processing deferred refcounts. */ -STATIC void -xfs_refcount_update_finish_cleanup( - struct xfs_trans *tp, - void *state, - int error) -{ - struct xfs_btree_cur *rcur = state; - - xfs_refcount_finish_one_cleanup(tp, rcur, error); -} - -/* Abort all pending CUIs. */ -STATIC void -xfs_refcount_update_abort_intent( - void *intent) -{ - xfs_cui_release(intent); -} - -/* Cancel a deferred refcount update. */ -STATIC void -xfs_refcount_update_cancel_item( - struct list_head *item) -{ - struct xfs_refcount_intent *refc; - - refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_free(refc); -} - -const struct xfs_defer_op_type xfs_refcount_update_defer_type = { - .max_items = XFS_CUI_MAX_FAST_EXTENTS, - .diff_items = xfs_refcount_update_diff_items, - .create_intent = xfs_refcount_update_create_intent, - .abort_intent = xfs_refcount_update_abort_intent, - .log_item = xfs_refcount_update_log_item, - .create_done = xfs_refcount_update_create_done, - .finish_item = xfs_refcount_update_finish_item, - .finish_cleanup = xfs_refcount_update_finish_cleanup, - .cancel_item = xfs_refcount_update_cancel_item, -}; diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c deleted file mode 100644 index 5c7936b1be13..000000000000 --- a/fs/xfs/xfs_trans_rmap.c +++ /dev/null @@ -1,257 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Copyright (C) 2016 Oracle. All Rights Reserved. - * Author: Darrick J. Wong <darrick.wong@oracle.com> - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_shared.h" -#include "xfs_format.h" -#include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" -#include "xfs_defer.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_rmap_item.h" -#include "xfs_alloc.h" -#include "xfs_rmap.h" - -/* Set the map extent flags for this reverse mapping. */ -static void -xfs_trans_set_rmap_flags( - struct xfs_map_extent *rmap, - enum xfs_rmap_intent_type type, - int whichfork, - xfs_exntst_t state) -{ - rmap->me_flags = 0; - if (state == XFS_EXT_UNWRITTEN) - rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; - if (whichfork == XFS_ATTR_FORK) - rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; - switch (type) { - case XFS_RMAP_MAP: - rmap->me_flags |= XFS_RMAP_EXTENT_MAP; - break; - case XFS_RMAP_MAP_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; - break; - case XFS_RMAP_UNMAP: - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; - break; - case XFS_RMAP_UNMAP_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; - break; - case XFS_RMAP_CONVERT: - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; - break; - case XFS_RMAP_CONVERT_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; - break; - case XFS_RMAP_ALLOC: - rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; - break; - case XFS_RMAP_FREE: - rmap->me_flags |= XFS_RMAP_EXTENT_FREE; - break; - default: - ASSERT(0); - } -} - -struct xfs_rud_log_item * -xfs_trans_get_rud( - struct xfs_trans *tp, - struct xfs_rui_log_item *ruip) -{ - struct xfs_rud_log_item *rudp; - - rudp = xfs_rud_init(tp->t_mountp, ruip); - xfs_trans_add_item(tp, &rudp->rud_item); - return rudp; -} - -/* - * Finish an rmap update and log it to the RUD. Note that the transaction is - * marked dirty regardless of whether the rmap update succeeds or fails to - * support the RUI/RUD lifecycle rules. - */ -int -xfs_trans_log_finish_rmap_update( - struct xfs_trans *tp, - struct xfs_rud_log_item *rudp, - enum xfs_rmap_intent_type type, - uint64_t owner, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state, - struct xfs_btree_cur **pcur) -{ - int error; - - error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, - startblock, blockcount, state, pcur); - - /* - * Mark the transaction dirty, even on error. This ensures the - * transaction is aborted, which: - * - * 1.) releases the RUI and frees the RUD - * 2.) shuts down the filesystem - */ - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); - - return error; -} - -/* Sort rmap intents by AG. */ -static int -xfs_rmap_update_diff_items( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_mount *mp = priv; - struct xfs_rmap_intent *ra; - struct xfs_rmap_intent *rb; - - ra = container_of(a, struct xfs_rmap_intent, ri_list); - rb = container_of(b, struct xfs_rmap_intent, ri_list); - return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) - - XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock); -} - -/* Get an RUI. */ -STATIC void * -xfs_rmap_update_create_intent( - struct xfs_trans *tp, - unsigned int count) -{ - struct xfs_rui_log_item *ruip; - - ASSERT(tp != NULL); - ASSERT(count > 0); - - ruip = xfs_rui_init(tp->t_mountp, count); - ASSERT(ruip != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &ruip->rui_item); - return ruip; -} - -/* Log rmap updates in the intent item. */ -STATIC void -xfs_rmap_update_log_item( - struct xfs_trans *tp, - void *intent, - struct list_head *item) -{ - struct xfs_rui_log_item *ruip = intent; - struct xfs_rmap_intent *rmap; - uint next_extent; - struct xfs_map_extent *map; - - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - - tp->t_flags |= XFS_TRANS_DIRTY; - set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); - - /* - * atomic_inc_return gives us the value after the increment; - * we want to use it as an array index so we need to subtract 1 from - * it. - */ - next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; - ASSERT(next_extent < ruip->rui_format.rui_nextents); - map = &ruip->rui_format.rui_extents[next_extent]; - map->me_owner = rmap->ri_owner; - map->me_startblock = rmap->ri_bmap.br_startblock; - map->me_startoff = rmap->ri_bmap.br_startoff; - map->me_len = rmap->ri_bmap.br_blockcount; - xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, - rmap->ri_bmap.br_state); -} - -/* Get an RUD so we can process all the deferred rmap updates. */ -STATIC void * -xfs_rmap_update_create_done( - struct xfs_trans *tp, - void *intent, - unsigned int count) -{ - return xfs_trans_get_rud(tp, intent); -} - -/* Process a deferred rmap update. */ -STATIC int -xfs_rmap_update_finish_item( - struct xfs_trans *tp, - struct list_head *item, - void *done_item, - void **state) -{ - struct xfs_rmap_intent *rmap; - int error; - - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - error = xfs_trans_log_finish_rmap_update(tp, done_item, - rmap->ri_type, - rmap->ri_owner, rmap->ri_whichfork, - rmap->ri_bmap.br_startoff, - rmap->ri_bmap.br_startblock, - rmap->ri_bmap.br_blockcount, - rmap->ri_bmap.br_state, - (struct xfs_btree_cur **)state); - kmem_free(rmap); - return error; -} - -/* Clean up after processing deferred rmaps. */ -STATIC void -xfs_rmap_update_finish_cleanup( - struct xfs_trans *tp, - void *state, - int error) -{ - struct xfs_btree_cur *rcur = state; - - xfs_rmap_finish_one_cleanup(tp, rcur, error); -} - -/* Abort all pending RUIs. */ -STATIC void -xfs_rmap_update_abort_intent( - void *intent) -{ - xfs_rui_release(intent); -} - -/* Cancel a deferred rmap update. */ -STATIC void -xfs_rmap_update_cancel_item( - struct list_head *item) -{ - struct xfs_rmap_intent *rmap; - - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_free(rmap); -} - -const struct xfs_defer_op_type xfs_rmap_update_defer_type = { - .max_items = XFS_RUI_MAX_FAST_EXTENTS, - .diff_items = xfs_rmap_update_diff_items, - .create_intent = xfs_rmap_update_create_intent, - .abort_intent = xfs_rmap_update_abort_intent, - .log_item = xfs_rmap_update_log_item, - .create_done = xfs_rmap_update_create_done, - .finish_item = xfs_rmap_update_finish_item, - .finish_cleanup = xfs_rmap_update_finish_cleanup, - .cancel_item = xfs_rmap_update_cancel_item, -}; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 9a63016009a1..3123b5aaad2a 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -5,15 +5,12 @@ */ #include "xfs.h" +#include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" -#include "xfs_trans_resv.h" -#include "xfs_mount.h" #include "xfs_da_format.h" #include "xfs_inode.h" #include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_acl.h" #include <linux/posix_acl_xattr.h> #include <linux/xattr.h> |