diff options
Diffstat (limited to 'libbcachefs/fs.c')
-rw-r--r-- | libbcachefs/fs.c | 217 |
1 files changed, 130 insertions, 87 deletions
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index bdd9183b..c50457ba 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -152,42 +152,80 @@ int bch2_fs_quota_transfer(struct bch_fs *c, return ret; } -static int bch2_iget5_test(struct inode *vinode, void *p) +static bool subvol_inum_eq(subvol_inum a, subvol_inum b) { - struct bch_inode_info *inode = to_bch_ei(vinode); - subvol_inum *inum = p; + return a.subvol == b.subvol && a.inum == b.inum; +} + +static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct bch_inode_info *inode = obj; + const subvol_inum *v = arg->key; - return inode->ei_subvol == inum->subvol && - inode->ei_inode.bi_inum == inum->inum; + return !subvol_inum_eq(inode->ei_inum, *v); } -static int bch2_iget5_set(struct inode *vinode, void *p) +static const struct rhashtable_params bch2_vfs_inodes_params = { + .head_offset = offsetof(struct bch_inode_info, hash), + .key_offset = offsetof(struct bch_inode_info, ei_inum), + .key_len = sizeof(subvol_inum), + .obj_cmpfn = bch2_vfs_inode_cmp_fn, + .automatic_shrinking = true, +}; + +static void __wait_on_freeing_inode(struct inode *inode) { - struct bch_inode_info *inode = to_bch_ei(vinode); - subvol_inum *inum = p; + wait_queue_head_t *wq; + DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + wq = bit_waitqueue(&inode->i_state, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->i_lock); + schedule(); + finish_wait(wq, &wait.wq_entry); +} - inode->v.i_ino = inum->inum; - inode->ei_subvol = inum->subvol; - inode->ei_inode.bi_inum = inum->inum; - return 0; +static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) +{ + struct bch_inode_info *inode; +repeat: + inode = rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, + bch2_vfs_inodes_params); + if (inode) { + spin_lock(&inode->v.i_lock); + if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { + __wait_on_freeing_inode(&inode->v); + goto repeat; + } + __iget(&inode->v); + spin_unlock(&inode->v.i_lock); + } + + return inode; } -static unsigned bch2_inode_hash(subvol_inum inum) +static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode) { - return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL); + if (test_bit(EI_INODE_HASHED, &inode->ei_flags)) { + int ret = rhashtable_remove_fast(&c->vfs_inodes_table, + &inode->hash, bch2_vfs_inodes_params); + BUG_ON(ret); + clear_bit(EI_INODE_HASHED, &inode->ei_flags); + inode->v.i_hash.pprev = NULL; + } } -static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode) +static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode) { - subvol_inum inum = inode_inum(inode); - struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v, - bch2_inode_hash(inum), - bch2_iget5_test, - bch2_iget5_set, - &inum)); - BUG_ON(!old); + struct bch_inode_info *old = inode; +retry: + if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table, + &inode->hash, + bch2_vfs_inodes_params))) { + old = bch2_inode_hash_find(c, inode->ei_inum); + if (!old) + goto retry; - if (unlikely(old != inode)) { /* * bcachefs doesn't use I_NEW; we have no use for it since we * only insert fully created inodes in the inode hash table. But @@ -203,16 +241,14 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino discard_new_inode(&inode->v); inode = old; } else { + set_bit(EI_INODE_HASHED, &inode->ei_flags); + inode_fake_hash(&inode->v); + + inode_sb_list_add(&inode->v); + mutex_lock(&c->vfs_inodes_lock); list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); mutex_unlock(&c->vfs_inodes_lock); - /* - * Again, I_NEW makes no sense for bcachefs. This is only needed - * for clearing I_NEW, but since the inode was already fully - * created and initialized we didn't actually want - * inode_insert5() to set it for us. - */ - unlock_new_inode(&inode->v); } return inode; @@ -245,7 +281,6 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c) inode->ei_flags = 0; mutex_init(&inode->ei_quota_lock); memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush)); - inode->v.i_state = 0; if (unlikely(inode_init_always(c->vfs_sb, &inode->v))) { kmem_cache_free(bch2_inode_cache, inode); @@ -279,11 +314,7 @@ static struct bch_inode_info *bch2_new_inode(struct btree_trans *trans) struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) { - struct bch_inode_info *inode = - to_bch_ei(ilookup5_nowait(c->vfs_sb, - bch2_inode_hash(inum), - bch2_iget5_test, - &inum)); + struct bch_inode_info *inode = bch2_inode_hash_find(c, inum); if (inode) return &inode->v; @@ -297,7 +328,7 @@ struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans)); if (!ret) { bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); - inode = bch2_inode_insert(c, inode); + inode = bch2_inode_hash_insert(c, inode); } bch2_trans_put(trans); @@ -345,7 +376,7 @@ __bch2_create(struct mnt_idmap *idmap, retry: bch2_trans_begin(trans); - ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?: + ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?: bch2_create_trans(trans, inode_inum(dir), &dir_u, &inode_u, !(flags & BCH_CREATE_TMPFILE) @@ -359,7 +390,7 @@ retry: if (unlikely(ret)) goto err_before_quota; - inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol; + inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol; inum.inum = inode_u.bi_inum; ret = bch2_subvolume_get(trans, inum.subvol, true, @@ -390,7 +421,7 @@ err_before_quota: * bch2_trans_exit() and dropping locks, else we could race with another * thread pulling the inode in and modifying it: */ - inode = bch2_inode_insert(c, inode); + inode = bch2_inode_hash_insert(c, inode); bch2_trans_put(trans); err: posix_acl_release(default_acl); @@ -430,11 +461,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, if (ret) goto err; - struct bch_inode_info *inode = - to_bch_ei(ilookup5_nowait(c->vfs_sb, - bch2_inode_hash(inum), - bch2_iget5_test, - &inum)); + struct bch_inode_info *inode = bch2_inode_hash_find(c, inum); if (inode) goto out; @@ -464,7 +491,7 @@ static struct bch_inode_info *bch2_lookup_trans(struct btree_trans *trans, } bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); - inode = bch2_inode_insert(c, inode); + inode = bch2_inode_hash_insert(c, inode); out: bch2_trans_iter_exit(trans, &dirent_iter); printbuf_exit(&buf); @@ -551,8 +578,8 @@ static int bch2_link(struct dentry *old_dentry, struct inode *vdir, lockdep_assert_held(&inode->v.i_rwsem); - ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: - bch2_subvol_is_ro(c, inode->ei_subvol) ?: + ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?: + bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: __bch2_link(c, inode, dir, dentry); if (unlikely(ret)) return bch2_err_class(ret); @@ -608,7 +635,7 @@ static int bch2_unlink(struct inode *vdir, struct dentry *dentry) struct bch_inode_info *dir= to_bch_ei(vdir); struct bch_fs *c = dir->v.i_sb->s_fs_info; - int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?: + int ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?: __bch2_unlink(vdir, dentry, false); return bch2_err_class(ret); } @@ -691,8 +718,8 @@ static int bch2_rename2(struct mnt_idmap *idmap, trans = bch2_trans_get(c); - ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?: - bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol); + ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?: + bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol); if (ret) goto err; @@ -893,7 +920,7 @@ static int bch2_getattr(struct mnt_idmap *idmap, stat->blksize = block_bytes(c); stat->blocks = inode->v.i_blocks; - stat->subvol = inode->ei_subvol; + stat->subvol = inode->ei_inum.subvol; stat->result_mask |= STATX_SUBVOL; if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) { @@ -935,7 +962,7 @@ static int bch2_setattr(struct mnt_idmap *idmap, lockdep_assert_held(&inode->v.i_rwsem); - ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?: + ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: setattr_prepare(idmap, dentry, iattr); if (ret) return ret; @@ -1028,7 +1055,6 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, struct bkey_buf cur, prev; unsigned offset_into_extent, sectors; bool have_extent = false; - u32 snapshot; int ret = 0; ret = fiemap_prep(&ei->v, info, start, &len, FIEMAP_FLAG_SYNC); @@ -1044,21 +1070,30 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, bch2_bkey_buf_init(&cur); bch2_bkey_buf_init(&prev); trans = bch2_trans_get(c); -retry: - bch2_trans_begin(trans); - - ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot); - if (ret) - goto err; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - SPOS(ei->v.i_ino, start, snapshot), 0); + POS(ei->v.i_ino, start), 0); - while (!(ret = btree_trans_too_many_iters(trans)) && - (k = bch2_btree_iter_peek_upto(&iter, end)).k && - !(ret = bkey_err(k))) { + while (true) { enum btree_id data_btree = BTREE_ID_extents; + bch2_trans_begin(trans); + + u32 snapshot; + ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot); + if (ret) + goto err; + + bch2_btree_iter_set_snapshot(&iter, snapshot); + + k = bch2_btree_iter_peek_upto(&iter, end); + ret = bkey_err(k); + if (ret) + goto err; + + if (!k.k) + break; + if (!bkey_extent_is_data(k.k) && k.k->type != KEY_TYPE_reservation) { bch2_btree_iter_advance(&iter); @@ -1102,16 +1137,12 @@ retry: bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, iter.pos.offset + sectors)); - - ret = bch2_trans_relock(trans); - if (ret) +err: + if (ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart)) break; } - start = iter.pos.offset; bch2_trans_iter_exit(trans, &iter); -err: - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; if (!ret && have_extent) { bch2_trans_unlock(trans); @@ -1167,7 +1198,7 @@ static int bch2_open(struct inode *vinode, struct file *file) struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; - int ret = bch2_subvol_is_ro(c, inode->ei_subvol); + int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol); if (ret) return ret; } @@ -1201,7 +1232,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1221,7 +1252,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1243,7 +1274,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1253,7 +1284,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1299,8 +1330,8 @@ static int bcachefs_fid_valid(int fh_len, int fh_type) static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode) { return (struct bcachefs_fid) { - .inum = inode->ei_inode.bi_inum, - .subvol = inode->ei_subvol, + .inum = inode->ei_inum.inum, + .subvol = inode->ei_inum.subvol, .gen = inode->ei_inode.bi_generation, }; } @@ -1385,7 +1416,7 @@ static struct dentry *bch2_get_parent(struct dentry *child) struct bch_fs *c = inode->v.i_sb->s_fs_info; subvol_inum parent_inum = { .subvol = inode->ei_inode.bi_parent_subvol ?: - inode->ei_subvol, + inode->ei_inum.subvol, .inum = inode->ei_inode.bi_dir, }; @@ -1421,7 +1452,7 @@ static int bch2_get_name(struct dentry *parent, char *name, struct dentry *child retry: bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot); + ret = bch2_subvolume_get_snapshot(trans, dir->ei_inum.subvol, &snapshot); if (ret) goto err; @@ -1452,8 +1483,7 @@ retry: if (ret) goto err; - if (target.subvol == inode->ei_subvol && - target.inum == inode->ei_inode.bi_inum) + if (subvol_inum_eq(target, inode->ei_inum)) goto found; } else { /* @@ -1474,8 +1504,7 @@ retry: if (ret) continue; - if (target.subvol == inode->ei_subvol && - target.inum == inode->ei_inode.bi_inum) + if (subvol_inum_eq(target, inode->ei_inum)) goto found; } } @@ -1512,7 +1541,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, struct bch_inode_unpacked *bi, struct bch_subvolume *subvol) { - bch2_iget5_set(&inode->v, &inum); + inode->v.i_ino = inum.inum; + inode->ei_inum = inum; + inode->ei_inode.bi_inum = inum.inum; bch2_inode_update_after_write(trans, inode, bi, ~0); inode->v.i_blocks = bi->bi_sectors; @@ -1524,7 +1555,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, inode->ei_flags = 0; inode->ei_quota_reserved = 0; inode->ei_qid = bch_qid(bi); - inode->ei_subvol = inum.subvol; if (BCH_SUBVOLUME_SNAP(subvol)) set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags); @@ -1592,6 +1622,8 @@ static void bch2_evict_inode(struct inode *vinode) struct bch_fs *c = vinode->i_sb->s_fs_info; struct bch_inode_info *inode = to_bch_ei(vinode); + bch2_inode_hash_remove(c, inode); + truncate_inode_pages_final(&inode->v.i_data); clear_inode(&inode->v); @@ -1633,7 +1665,7 @@ again: mutex_lock(&c->vfs_inodes_lock); list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) { - if (!snapshot_list_has_id(s, inode->ei_subvol)) + if (!snapshot_list_has_id(s, inode->ei_inum.subvol)) continue; if (!(inode->v.i_state & I_DONTCACHE) && @@ -2121,6 +2153,17 @@ static int bch2_init_fs_context(struct fs_context *fc) return 0; } +void bch2_fs_vfs_exit(struct bch_fs *c) +{ + if (c->vfs_inodes_table.tbl) + rhashtable_destroy(&c->vfs_inodes_table); +} + +int bch2_fs_vfs_init(struct bch_fs *c) +{ + return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params); +} + static struct file_system_type bcache_fs_type = { .owner = THIS_MODULE, .name = "bcachefs", |