summaryrefslogtreecommitdiff
path: root/libbcachefs/fs-common.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/fs-common.c')
-rw-r--r--libbcachefs/fs-common.c631
1 files changed, 631 insertions, 0 deletions
diff --git a/libbcachefs/fs-common.c b/libbcachefs/fs-common.c
new file mode 100644
index 00000000..dcaa47f6
--- /dev/null
+++ b/libbcachefs/fs-common.c
@@ -0,0 +1,631 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "acl.h"
+#include "btree_update.h"
+#include "dirent.h"
+#include "fs-common.h"
+#include "inode.h"
+#include "subvolume.h"
+#include "xattr.h"
+
+#include <linux/posix_acl.h>
+
+static inline int is_subdir_for_nlink(struct bch_inode_unpacked *inode)
+{
+ return S_ISDIR(inode->bi_mode) && !inode->bi_subvol;
+}
+
+int bch2_create_trans(struct btree_trans *trans,
+ subvol_inum dir,
+ struct bch_inode_unpacked *dir_u,
+ struct bch_inode_unpacked *new_inode,
+ const struct qstr *name,
+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev,
+ struct posix_acl *default_acl,
+ struct posix_acl *acl,
+ subvol_inum snapshot_src,
+ unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dir_iter = { NULL };
+ struct btree_iter inode_iter = { NULL };
+ subvol_inum new_inum = dir;
+ u64 now = bch2_current_time(c);
+ u64 cpu = raw_smp_processor_id();
+ u64 dir_target;
+ u32 snapshot;
+ unsigned dir_type = mode_to_type(mode);
+ int ret;
+
+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir,
+ BTREE_ITER_intent|BTREE_ITER_with_updates);
+ if (ret)
+ goto err;
+
+ if (!(flags & BCH_CREATE_SNAPSHOT)) {
+ /* Normal create path - allocate a new inode: */
+ bch2_inode_init_late(new_inode, now, uid, gid, mode, rdev, dir_u);
+
+ if (flags & BCH_CREATE_TMPFILE)
+ new_inode->bi_flags |= BCH_INODE_unlinked;
+
+ ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
+ if (ret)
+ goto err;
+
+ snapshot_src = (subvol_inum) { 0 };
+ } else {
+ /*
+ * Creating a snapshot - we're not allocating a new inode, but
+ * we do have to lookup the root inode of the subvolume we're
+ * snapshotting and update it (in the new snapshot):
+ */
+
+ if (!snapshot_src.inum) {
+ /* Inode wasn't specified, just snapshot: */
+ struct bch_subvolume s;
+
+ ret = bch2_subvolume_get(trans, snapshot_src.subvol, true,
+ BTREE_ITER_cached, &s);
+ if (ret)
+ goto err;
+
+ snapshot_src.inum = le64_to_cpu(s.inode);
+ }
+
+ ret = bch2_inode_peek(trans, &inode_iter, new_inode, snapshot_src,
+ BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ if (new_inode->bi_subvol != snapshot_src.subvol) {
+ /* Not a subvolume root: */
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * If we're not root, we have to own the subvolume being
+ * snapshotted:
+ */
+ if (uid && new_inode->bi_uid != uid) {
+ ret = -EPERM;
+ goto err;
+ }
+
+ flags |= BCH_CREATE_SUBVOL;
+ }
+
+ new_inum.inum = new_inode->bi_inum;
+ dir_target = new_inode->bi_inum;
+
+ if (flags & BCH_CREATE_SUBVOL) {
+ u32 new_subvol, dir_snapshot;
+
+ ret = bch2_subvolume_create(trans, new_inode->bi_inum,
+ dir.subvol,
+ snapshot_src.subvol,
+ &new_subvol, &snapshot,
+ (flags & BCH_CREATE_SNAPSHOT_RO) != 0);
+ if (ret)
+ goto err;
+
+ new_inode->bi_parent_subvol = dir.subvol;
+ new_inode->bi_subvol = new_subvol;
+ new_inum.subvol = new_subvol;
+ dir_target = new_subvol;
+ dir_type = DT_SUBVOL;
+
+ ret = bch2_subvolume_get_snapshot(trans, dir.subvol, &dir_snapshot);
+ if (ret)
+ goto err;
+
+ bch2_btree_iter_set_snapshot(&dir_iter, dir_snapshot);
+ ret = bch2_btree_iter_traverse(&dir_iter);
+ if (ret)
+ goto err;
+ }
+
+ if (!(flags & BCH_CREATE_SNAPSHOT)) {
+ if (default_acl) {
+ ret = bch2_set_acl_trans(trans, new_inum, new_inode,
+ default_acl, ACL_TYPE_DEFAULT);
+ if (ret)
+ goto err;
+ }
+
+ if (acl) {
+ ret = bch2_set_acl_trans(trans, new_inum, new_inode,
+ acl, ACL_TYPE_ACCESS);
+ if (ret)
+ goto err;
+ }
+ }
+
+ if (!(flags & BCH_CREATE_TMPFILE)) {
+ struct bch_hash_info dir_hash = bch2_hash_info_init(c, dir_u);
+ u64 dir_offset;
+
+ if (is_subdir_for_nlink(new_inode))
+ dir_u->bi_nlink++;
+ dir_u->bi_mtime = dir_u->bi_ctime = now;
+
+ ret = bch2_inode_write(trans, &dir_iter, dir_u);
+ if (ret)
+ goto err;
+
+ ret = bch2_dirent_create(trans, dir, &dir_hash,
+ dir_type,
+ name,
+ dir_target,
+ &dir_offset,
+ STR_HASH_must_create|BTREE_ITER_with_updates);
+ if (ret)
+ goto err;
+
+ new_inode->bi_dir = dir_u->bi_inum;
+ new_inode->bi_dir_offset = dir_offset;
+ }
+
+ inode_iter.flags &= ~BTREE_ITER_all_snapshots;
+ bch2_btree_iter_set_snapshot(&inode_iter, snapshot);
+
+ ret = bch2_btree_iter_traverse(&inode_iter) ?:
+ bch2_inode_write(trans, &inode_iter, new_inode);
+err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &dir_iter);
+ return ret;
+}
+
+int bch2_link_trans(struct btree_trans *trans,
+ subvol_inum dir, struct bch_inode_unpacked *dir_u,
+ subvol_inum inum, struct bch_inode_unpacked *inode_u,
+ const struct qstr *name)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dir_iter = { NULL };
+ struct btree_iter inode_iter = { NULL };
+ struct bch_hash_info dir_hash;
+ u64 now = bch2_current_time(c);
+ u64 dir_offset = 0;
+ int ret;
+
+ if (dir.subvol != inum.subvol)
+ return -EXDEV;
+
+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum, BTREE_ITER_intent);
+ if (ret)
+ return ret;
+
+ inode_u->bi_ctime = now;
+ ret = bch2_inode_nlink_inc(inode_u);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ if (bch2_reinherit_attrs(inode_u, dir_u)) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ dir_u->bi_mtime = dir_u->bi_ctime = now;
+
+ dir_hash = bch2_hash_info_init(c, dir_u);
+
+ ret = bch2_dirent_create(trans, dir, &dir_hash,
+ mode_to_type(inode_u->bi_mode),
+ name, inum.inum, &dir_offset,
+ STR_HASH_must_create);
+ if (ret)
+ goto err;
+
+ inode_u->bi_dir = dir.inum;
+ inode_u->bi_dir_offset = dir_offset;
+
+ ret = bch2_inode_write(trans, &dir_iter, dir_u) ?:
+ bch2_inode_write(trans, &inode_iter, inode_u);
+err:
+ bch2_trans_iter_exit(trans, &dir_iter);
+ bch2_trans_iter_exit(trans, &inode_iter);
+ return ret;
+}
+
+int bch2_unlink_trans(struct btree_trans *trans,
+ subvol_inum dir,
+ struct bch_inode_unpacked *dir_u,
+ struct bch_inode_unpacked *inode_u,
+ const struct qstr *name,
+ bool deleting_subvol)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter dir_iter = { NULL };
+ struct btree_iter dirent_iter = { NULL };
+ struct btree_iter inode_iter = { NULL };
+ struct bch_hash_info dir_hash;
+ subvol_inum inum;
+ u64 now = bch2_current_time(c);
+ struct bkey_s_c k;
+ int ret;
+
+ ret = bch2_inode_peek(trans, &dir_iter, dir_u, dir, BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ dir_hash = bch2_hash_info_init(c, dir_u);
+
+ ret = bch2_dirent_lookup_trans(trans, &dirent_iter, dir, &dir_hash,
+ name, &inum, BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &inode_iter, inode_u, inum,
+ BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ if (!deleting_subvol && S_ISDIR(inode_u->bi_mode)) {
+ ret = bch2_empty_dir_trans(trans, inum);
+ if (ret)
+ goto err;
+ }
+
+ if (deleting_subvol && !inode_u->bi_subvol) {
+ ret = -BCH_ERR_ENOENT_not_subvol;
+ goto err;
+ }
+
+ if (inode_u->bi_subvol) {
+ /* Recursive subvolume destroy not allowed (yet?) */
+ ret = bch2_subvol_has_children(trans, inode_u->bi_subvol);
+ if (ret)
+ goto err;
+ }
+
+ if (deleting_subvol || inode_u->bi_subvol) {
+ ret = bch2_subvolume_unlink(trans, inode_u->bi_subvol);
+ if (ret)
+ goto err;
+
+ k = bch2_btree_iter_peek_slot(&dirent_iter);
+ ret = bkey_err(k);
+ if (ret)
+ goto err;
+
+ /*
+ * If we're deleting a subvolume, we need to really delete the
+ * dirent, not just emit a whiteout in the current snapshot:
+ */
+ bch2_btree_iter_set_snapshot(&dirent_iter, k.k->p.snapshot);
+ ret = bch2_btree_iter_traverse(&dirent_iter);
+ if (ret)
+ goto err;
+ } else {
+ bch2_inode_nlink_dec(trans, inode_u);
+ }
+
+ if (inode_u->bi_dir == dirent_iter.pos.inode &&
+ inode_u->bi_dir_offset == dirent_iter.pos.offset) {
+ inode_u->bi_dir = 0;
+ inode_u->bi_dir_offset = 0;
+ }
+
+ dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
+ dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
+
+ ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+ &dir_hash, &dirent_iter,
+ BTREE_UPDATE_internal_snapshot_node) ?:
+ bch2_inode_write(trans, &dir_iter, dir_u) ?:
+ bch2_inode_write(trans, &inode_iter, inode_u);
+err:
+ bch2_trans_iter_exit(trans, &inode_iter);
+ bch2_trans_iter_exit(trans, &dirent_iter);
+ bch2_trans_iter_exit(trans, &dir_iter);
+ return ret;
+}
+
+bool bch2_reinherit_attrs(struct bch_inode_unpacked *dst_u,
+ struct bch_inode_unpacked *src_u)
+{
+ u64 src, dst;
+ unsigned id;
+ bool ret = false;
+
+ for (id = 0; id < Inode_opt_nr; id++) {
+ /* Skip attributes that were explicitly set on this inode */
+ if (dst_u->bi_fields_set & (1 << id))
+ continue;
+
+ src = bch2_inode_opt_get(src_u, id);
+ dst = bch2_inode_opt_get(dst_u, id);
+
+ if (src == dst)
+ continue;
+
+ bch2_inode_opt_set(dst_u, id, src);
+ ret = true;
+ }
+
+ return ret;
+}
+
+static int subvol_update_parent(struct btree_trans *trans, u32 subvol, u32 new_parent)
+{
+ struct btree_iter iter;
+ struct bkey_i_subvolume *s =
+ bch2_bkey_get_mut_typed(trans, &iter,
+ BTREE_ID_subvolumes, POS(0, subvol),
+ BTREE_ITER_cached, subvolume);
+ int ret = PTR_ERR_OR_ZERO(s);
+ if (ret)
+ return ret;
+
+ s->v.fs_path_parent = cpu_to_le32(new_parent);
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+}
+
+int bch2_rename_trans(struct btree_trans *trans,
+ subvol_inum src_dir, struct bch_inode_unpacked *src_dir_u,
+ subvol_inum dst_dir, struct bch_inode_unpacked *dst_dir_u,
+ struct bch_inode_unpacked *src_inode_u,
+ struct bch_inode_unpacked *dst_inode_u,
+ const struct qstr *src_name,
+ const struct qstr *dst_name,
+ enum bch_rename_mode mode)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter src_dir_iter = { NULL };
+ struct btree_iter dst_dir_iter = { NULL };
+ struct btree_iter src_inode_iter = { NULL };
+ struct btree_iter dst_inode_iter = { NULL };
+ struct bch_hash_info src_hash, dst_hash;
+ subvol_inum src_inum, dst_inum;
+ u64 src_offset, dst_offset;
+ u64 now = bch2_current_time(c);
+ int ret;
+
+ ret = bch2_inode_peek(trans, &src_dir_iter, src_dir_u, src_dir,
+ BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ src_hash = bch2_hash_info_init(c, src_dir_u);
+
+ if (dst_dir.inum != src_dir.inum ||
+ dst_dir.subvol != src_dir.subvol) {
+ ret = bch2_inode_peek(trans, &dst_dir_iter, dst_dir_u, dst_dir,
+ BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ dst_hash = bch2_hash_info_init(c, dst_dir_u);
+ } else {
+ dst_dir_u = src_dir_u;
+ dst_hash = src_hash;
+ }
+
+ ret = bch2_dirent_rename(trans,
+ src_dir, &src_hash,
+ dst_dir, &dst_hash,
+ src_name, &src_inum, &src_offset,
+ dst_name, &dst_inum, &dst_offset,
+ mode);
+ if (ret)
+ goto err;
+
+ ret = bch2_inode_peek(trans, &src_inode_iter, src_inode_u, src_inum,
+ BTREE_ITER_intent);
+ if (ret)
+ goto err;
+
+ if (dst_inum.inum) {
+ ret = bch2_inode_peek(trans, &dst_inode_iter, dst_inode_u, dst_inum,
+ BTREE_ITER_intent);
+ if (ret)
+ goto err;
+ }
+
+ if (src_inode_u->bi_subvol &&
+ dst_dir.subvol != src_inode_u->bi_parent_subvol) {
+ ret = subvol_update_parent(trans, src_inode_u->bi_subvol, dst_dir.subvol);
+ if (ret)
+ goto err;
+ }
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ dst_inode_u->bi_subvol &&
+ src_dir.subvol != dst_inode_u->bi_parent_subvol) {
+ ret = subvol_update_parent(trans, dst_inode_u->bi_subvol, src_dir.subvol);
+ if (ret)
+ goto err;
+ }
+
+ /* Can't move across subvolumes, unless it's a subvolume root: */
+ if (src_dir.subvol != dst_dir.subvol &&
+ (!src_inode_u->bi_subvol ||
+ (dst_inum.inum && !dst_inode_u->bi_subvol))) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ if (src_inode_u->bi_parent_subvol)
+ src_inode_u->bi_parent_subvol = dst_dir.subvol;
+
+ if ((mode == BCH_RENAME_EXCHANGE) &&
+ dst_inode_u->bi_parent_subvol)
+ dst_inode_u->bi_parent_subvol = src_dir.subvol;
+
+ src_inode_u->bi_dir = dst_dir_u->bi_inum;
+ src_inode_u->bi_dir_offset = dst_offset;
+
+ if (mode == BCH_RENAME_EXCHANGE) {
+ dst_inode_u->bi_dir = src_dir_u->bi_inum;
+ dst_inode_u->bi_dir_offset = src_offset;
+ }
+
+ if (mode == BCH_RENAME_OVERWRITE &&
+ dst_inode_u->bi_dir == dst_dir_u->bi_inum &&
+ dst_inode_u->bi_dir_offset == src_offset) {
+ dst_inode_u->bi_dir = 0;
+ dst_inode_u->bi_dir_offset = 0;
+ }
+
+ if (mode == BCH_RENAME_OVERWRITE) {
+ if (S_ISDIR(src_inode_u->bi_mode) !=
+ S_ISDIR(dst_inode_u->bi_mode)) {
+ ret = -ENOTDIR;
+ goto err;
+ }
+
+ if (S_ISDIR(dst_inode_u->bi_mode)) {
+ ret = bch2_empty_dir_trans(trans, dst_inum);
+ if (ret)
+ goto err;
+ }
+ }
+
+ if (bch2_reinherit_attrs(src_inode_u, dst_dir_u) &&
+ S_ISDIR(src_inode_u->bi_mode)) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ if (mode == BCH_RENAME_EXCHANGE &&
+ bch2_reinherit_attrs(dst_inode_u, src_dir_u) &&
+ S_ISDIR(dst_inode_u->bi_mode)) {
+ ret = -EXDEV;
+ goto err;
+ }
+
+ if (is_subdir_for_nlink(src_inode_u)) {
+ src_dir_u->bi_nlink--;
+ dst_dir_u->bi_nlink++;
+ }
+
+ if (dst_inum.inum && is_subdir_for_nlink(dst_inode_u)) {
+ dst_dir_u->bi_nlink--;
+ src_dir_u->bi_nlink += mode == BCH_RENAME_EXCHANGE;
+ }
+
+ if (mode == BCH_RENAME_OVERWRITE)
+ bch2_inode_nlink_dec(trans, dst_inode_u);
+
+ src_dir_u->bi_mtime = now;
+ src_dir_u->bi_ctime = now;
+
+ if (src_dir.inum != dst_dir.inum) {
+ dst_dir_u->bi_mtime = now;
+ dst_dir_u->bi_ctime = now;
+ }
+
+ src_inode_u->bi_ctime = now;
+
+ if (dst_inum.inum)
+ dst_inode_u->bi_ctime = now;
+
+ ret = bch2_inode_write(trans, &src_dir_iter, src_dir_u) ?:
+ (src_dir.inum != dst_dir.inum
+ ? bch2_inode_write(trans, &dst_dir_iter, dst_dir_u)
+ : 0) ?:
+ bch2_inode_write(trans, &src_inode_iter, src_inode_u) ?:
+ (dst_inum.inum
+ ? bch2_inode_write(trans, &dst_inode_iter, dst_inode_u)
+ : 0);
+err:
+ bch2_trans_iter_exit(trans, &dst_inode_iter);
+ bch2_trans_iter_exit(trans, &src_inode_iter);
+ bch2_trans_iter_exit(trans, &dst_dir_iter);
+ bch2_trans_iter_exit(trans, &src_dir_iter);
+ return ret;
+}
+
+static inline void prt_bytes_reversed(struct printbuf *out, const void *b, unsigned n)
+{
+ bch2_printbuf_make_room(out, n);
+
+ unsigned can_print = min(n, printbuf_remaining(out));
+
+ b += n;
+
+ for (unsigned i = 0; i < can_print; i++)
+ out->buf[out->pos++] = *((char *) --b);
+
+ printbuf_nul_terminate(out);
+}
+
+static inline void reverse_bytes(void *b, size_t n)
+{
+ char *e = b + n, *s = b;
+
+ while (s < e) {
+ --e;
+ swap(*s, *e);
+ s++;
+ }
+}
+
+/* XXX: we don't yet attempt to print paths when we don't know the subvol */
+int bch2_inum_to_path(struct btree_trans *trans, subvol_inum inum, struct printbuf *path)
+{
+ unsigned orig_pos = path->pos;
+ int ret = 0;
+
+ while (!(inum.subvol == BCACHEFS_ROOT_SUBVOL &&
+ inum.inum == BCACHEFS_ROOT_INO)) {
+ struct bch_inode_unpacked inode;
+ ret = bch2_inode_find_by_inum_trans(trans, inum, &inode);
+ if (ret)
+ goto err;
+
+ if (!inode.bi_dir && !inode.bi_dir_offset) {
+ ret = -BCH_ERR_ENOENT_inode_no_backpointer;
+ goto err;
+ }
+
+ u32 snapshot;
+ ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot);
+ if (ret)
+ goto err;
+
+ struct btree_iter d_iter;
+ struct bkey_s_c_dirent d = bch2_bkey_get_iter_typed(trans, &d_iter,
+ BTREE_ID_dirents, SPOS(inode.bi_dir, inode.bi_dir_offset, snapshot),
+ 0, dirent);
+ ret = bkey_err(d.s_c);
+ if (ret)
+ goto err;
+
+ struct qstr dirent_name = bch2_dirent_get_name(d);
+ prt_bytes_reversed(path, dirent_name.name, dirent_name.len);
+
+ prt_char(path, '/');
+
+ if (d.v->d_type == DT_SUBVOL)
+ inum.subvol = le32_to_cpu(d.v->d_parent_subvol);
+ inum.inum = d.k->p.inode;
+
+ bch2_trans_iter_exit(trans, &d_iter);
+ }
+
+ if (orig_pos == path->pos)
+ prt_char(path, '/');
+
+ ret = path->allocation_failure ? -ENOMEM : 0;
+ if (ret)
+ goto err;
+
+ reverse_bytes(path->buf + orig_pos, path->pos - orig_pos);
+ return 0;
+err:
+ return ret;
+}