summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-05-25 20:00:21 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-07-30 01:13:55 -0800
commitff2b145eb4392401065bf55172453564e74afda8 (patch)
treea8aedb673de906805be6eb2606dd9b712887289a
parent50cf040486ea853c44f57ee4b700071b0a46c002 (diff)
snapshots
-rw-r--r--drivers/md/bcache/Makefile4
-rw-r--r--drivers/md/bcache/bcache.h11
-rw-r--r--drivers/md/bcache/bkey.h3
-rw-r--r--drivers/md/bcache/bkey_methods.c3
-rw-r--r--drivers/md/bcache/bkey_methods.h6
-rw-r--r--drivers/md/bcache/blockdev.c19
-rw-r--r--drivers/md/bcache/btree_cache.c1
-rw-r--r--drivers/md/bcache/btree_iter.c69
-rw-r--r--drivers/md/bcache/btree_iter.h48
-rw-r--r--drivers/md/bcache/btree_update.c61
-rw-r--r--drivers/md/bcache/btree_update.h29
-rw-r--r--drivers/md/bcache/dirent.c48
-rw-r--r--drivers/md/bcache/dirent.h2
-rw-r--r--drivers/md/bcache/extents.c146
-rw-r--r--drivers/md/bcache/fs-gc.c79
-rw-r--r--drivers/md/bcache/fs-gc.h5
-rw-r--r--drivers/md/bcache/fs-io.c76
-rw-r--r--drivers/md/bcache/fs.c260
-rw-r--r--drivers/md/bcache/fs.h4
-rw-r--r--drivers/md/bcache/inode.c47
-rw-r--r--drivers/md/bcache/inode.h17
-rw-r--r--drivers/md/bcache/io.c46
-rw-r--r--drivers/md/bcache/io.h20
-rw-r--r--drivers/md/bcache/io_types.h4
-rw-r--r--drivers/md/bcache/journal.c3
-rw-r--r--drivers/md/bcache/migrate.c12
-rw-r--r--drivers/md/bcache/move.c2
-rw-r--r--drivers/md/bcache/movinggc.c2
-rw-r--r--drivers/md/bcache/request.c36
-rw-r--r--drivers/md/bcache/snapshot.c617
-rw-r--r--drivers/md/bcache/snapshot.h82
-rw-r--r--drivers/md/bcache/str_hash.h56
-rw-r--r--drivers/md/bcache/super.c37
-rw-r--r--drivers/md/bcache/sysfs.c16
-rw-r--r--drivers/md/bcache/tier.c2
-rw-r--r--drivers/md/bcache/writeback.c3
-rw-r--r--drivers/md/bcache/xattr.c17
-rw-r--r--include/uapi/linux/bcache.h46
38 files changed, 1541 insertions, 398 deletions
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 70119335e649..18ed1e592b2e 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -6,5 +6,5 @@ bcache-y := acl.o alloc.o bkey.o bkey_methods.o blockdev.o\
buckets.o chardev.o clock.o closure.o debug.o dirent.o error.o\
extents.o fs.o fs-gc.o fs-io.o inode.o io.o journal.o keybuf.o\
keylist.o migrate.o move.o movinggc.o notify.o opts.o request.o\
- siphash.o six.o stats.o super.o sysfs.o tier.o trace.o util.o\
- writeback.o xattr.o
+ siphash.o six.o snapshot.o stats.o super.o sysfs.o tier.o trace.o\
+ util.o writeback.o xattr.o
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 6903ab40ac6f..253f343b16bb 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -303,6 +303,9 @@
struct btree;
struct cache;
+struct snapshot;
+
+#define SNAPSHOT_NONE ((struct snapshot *) 1)
enum gc_phase {
GC_PHASE_PENDING_DELETE = BTREE_ID_NR + 1,
@@ -547,6 +550,7 @@ struct cache_set {
u8 data_replicas_have;
u8 str_hash_type;
+ u8 snapshot_str_hash_type;
} sb;
struct cache_sb disk_sb;
@@ -746,6 +750,13 @@ struct cache_set {
/* FILESYSTEM */
atomic_long_t nr_inodes;
+ /* SNAPSHOTS */
+ struct rhashtable snapshot_cache;
+ bool snapshot_cache_init_done;
+ struct list_head snapshot_handles;
+ struct mutex snapshot_lock;
+ struct snapshot *snapshot_root;
+
/* TIERING */
struct task_struct *tiering_read;
struct bch_pd_controller tiering_pd;
diff --git a/drivers/md/bcache/bkey.h b/drivers/md/bcache/bkey.h
index 881c5ebea4f3..d0f18a34f0e3 100644
--- a/drivers/md/bcache/bkey.h
+++ b/drivers/md/bcache/bkey.h
@@ -507,6 +507,9 @@ BKEY_VAL_ACCESSORS(dirent, BCH_DIRENT);
BKEY_VAL_ACCESSORS(xattr, BCH_XATTR);
+BKEY_VAL_ACCESSORS(snapshot_name, BCH_SNAPSHOT_NAME);
+BKEY_VAL_ACCESSORS(snapshot_node, BCH_SNAPSHOT_NODE);
+
/* byte order helpers */
#if !defined(__LITTLE_ENDIAN) && !defined(__BIG_ENDIAN)
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index 47db7a2ba04a..545b89cb1ad4 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -6,6 +6,7 @@
#include "error.h"
#include "extents.h"
#include "inode.h"
+#include "snapshot.h"
#include "xattr.h"
static const struct bkey_ops *bch_bkey_ops[] = {
@@ -14,6 +15,8 @@ static const struct bkey_ops *bch_bkey_ops[] = {
[BKEY_TYPE_DIRENTS] = &bch_bkey_dirent_ops,
[BKEY_TYPE_XATTRS] = &bch_bkey_xattr_ops,
[BKEY_TYPE_BTREE] = &bch_bkey_btree_ops,
+ [BKEY_TYPE_SNAPSHOT_NAMES] = &bch_bkey_snapshot_name_ops,
+ [BKEY_TYPE_SNAPSHOT_TREE] = &bch_bkey_snapshot_tree_ops,
};
/* Returns string indicating reason for being invalid, or NULL if valid: */
diff --git a/drivers/md/bcache/bkey_methods.h b/drivers/md/bcache/bkey_methods.h
index 03ca92e28a29..781e785699d2 100644
--- a/drivers/md/bcache/bkey_methods.h
+++ b/drivers/md/bcache/bkey_methods.h
@@ -1,11 +1,11 @@
#ifndef _BCACHE_BKEY_METHODS_H
#define _BCACHE_BKEY_METHODS_H
-#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val,
-
enum bkey_type {
+#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val,
DEFINE_BCH_BTREE_IDS()
BKEY_TYPE_BTREE,
+#undef DEF_BTREE_ID
};
/* Type of a key in btree @id at level @level: */
@@ -53,6 +53,4 @@ void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
void bch_bkey_swab(enum bkey_type, const struct bkey_format *,
struct bkey_packed *);
-#undef DEF_BTREE_ID
-
#endif /* _BCACHE_BKEY_METHODS_H */
diff --git a/drivers/md/bcache/blockdev.c b/drivers/md/bcache/blockdev.c
index aa37e1f59062..2773f17d69cb 100644
--- a/drivers/md/bcache/blockdev.c
+++ b/drivers/md/bcache/blockdev.c
@@ -136,7 +136,7 @@ static void bcache_device_detach(struct bcache_device *d)
if (test_bit(BCACHE_DEV_DETACHING, &d->flags)) {
mutex_lock(&d->inode_lock);
- bch_inode_rm(d->c, bcache_dev_inum(d));
+ bch_inode_rm(d->c, d->c->snapshot_root, bcache_dev_inum(d));
mutex_unlock(&d->inode_lock);
}
@@ -394,6 +394,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
}
found = !bch_cached_dev_inode_find_by_uuid(c,
+ c->snapshot_root,
&dc->disk_sb.sb->disk_uuid,
&dc->disk.inode);
@@ -406,7 +407,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
(BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_STALE ||
BDEV_STATE(dc->disk_sb.sb) == BDEV_STATE_NONE)) {
found = false;
- bch_inode_rm(c, bcache_dev_inum(&dc->disk));
+ bch_inode_rm(c, c->snapshot_root, bcache_dev_inum(&dc->disk));
}
/* Deadlocks since we're called via sysfs...
@@ -427,7 +428,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
dc->disk.inode.v.i_ctime = rtime;
dc->disk.inode.v.i_mtime = rtime;
- ret = bch_inode_create(c, &dc->disk.inode.k_i,
+ ret = bch_inode_create(c, c->snapshot_root,
+ &dc->disk.inode.k_i,
0, BLOCKDEV_INODE_MAX,
&c->unused_inode_hint);
if (ret) {
@@ -444,7 +446,8 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
closure_sync(&cl);
} else {
dc->disk.inode.v.i_mtime = rtime;
- bch_inode_update(c, &dc->disk.inode.k_i, NULL);
+ bch_inode_update(c, c->snapshot_root,
+ &dc->disk.inode.k_i, NULL);
}
/* Count dirty sectors before attaching */
@@ -735,7 +738,8 @@ int bch_blockdev_volumes_start(struct cache_set *c)
if (test_bit(CACHE_SET_STOPPING, &c->flags))
return -EINVAL;
- for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
+ for_each_snapshot_key(&iter, c, c->snapshot_root,
+ BTREE_ID_INODES, POS_MIN, k) {
if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
break;
@@ -757,6 +761,7 @@ int bch_blockdev_volume_create(struct cache_set *c, u64 size)
{
__le64 rtime = cpu_to_le64(ktime_get_seconds());
struct bkey_i_inode_blockdev inode;
+ u64 hint = 0;
int ret;
bkey_inode_blockdev_init(&inode.k_i);
@@ -765,8 +770,8 @@ int bch_blockdev_volume_create(struct cache_set *c, u64 size)
inode.v.i_mtime = rtime;
inode.v.i_size = cpu_to_le64(size);
- ret = bch_inode_create(c, &inode.k_i, 0, BLOCKDEV_INODE_MAX,
- &c->unused_inode_hint);
+ ret = bch_inode_create(c, c->snapshot_root, &inode.k_i,
+ 0, BLOCKDEV_INODE_MAX, &hint);
if (ret) {
pr_err("Can't create volume: %d", ret);
return ret;
diff --git a/drivers/md/bcache/btree_cache.c b/drivers/md/bcache/btree_cache.c
index f8f4b7e1b48a..c36366cd777c 100644
--- a/drivers/md/bcache/btree_cache.c
+++ b/drivers/md/bcache/btree_cache.c
@@ -55,7 +55,6 @@ static const struct rhashtable_params bch_btree_cache_params = {
.head_offset = offsetof(struct btree, hash),
.key_offset = offsetof(struct btree, key.v),
.key_len = sizeof(struct bch_extent_ptr),
- .hashfn = jhash,
};
static void mca_data_alloc(struct cache_set *c, struct btree *b, gfp_t gfp)
diff --git a/drivers/md/bcache/btree_iter.c b/drivers/md/bcache/btree_iter.c
index 864ffa158f42..2c469ad4936a 100644
--- a/drivers/md/bcache/btree_iter.c
+++ b/drivers/md/bcache/btree_iter.c
@@ -6,6 +6,7 @@
#include "btree_locking.h"
#include "debug.h"
#include "extents.h"
+#include "snapshot.h"
#include <trace/events/bcache.h>
@@ -791,6 +792,19 @@ struct bkey_s_c bch_btree_iter_peek(struct btree_iter *iter)
}
}
+struct bkey_s_c bch_btree_iter_peek_snapshot(struct btree_iter *iter,
+ struct snapshot *snapshot)
+{
+ struct bkey_s_c k;
+
+ do
+ k = bch_btree_iter_peek(iter);
+ while (k.k &&
+ !bch_snapshot_is_descendant(iter->c, snapshot, k.k->p.snapshot));
+
+ return k;
+}
+
struct bkey_s_c bch_btree_iter_peek_with_holes(struct btree_iter *iter)
{
struct bkey_s_c k;
@@ -847,6 +861,61 @@ recheck:
return bkey_s_c_null;
}
+struct bkey_s_c bch_btree_iter_peek_snapshot_with_holes(struct btree_iter *iter,
+ struct snapshot *snapshot)
+{
+ struct bkey_s_c k;
+ struct bkey n;
+ int ret;
+
+ while (1) {
+ ret = __bch_btree_iter_traverse(iter, 0, iter->pos);
+ if (ret)
+ return bkey_s_c_null;
+
+ k = __btree_iter_peek_all(iter);
+recheck:
+ if (!k.k || bkey_cmp(bkey_start_pos(k.k), iter->pos) > 0) {
+ /* hole */
+ bkey_init(&n);
+ n.p = iter->pos;
+
+ if (!k.k)
+ k.k = &iter->l[0].node->key.k;
+
+ if (iter->btree_id == BTREE_ID_EXTENTS) {
+ if (n.p.offset == KEY_OFFSET_MAX) {
+ iter->pos = bkey_successor(iter->pos);
+ goto recheck;
+ }
+
+ bch_key_resize(&n,
+ min_t(u64, KEY_SIZE_MAX,
+ (k.k->p.inode == n.p.inode
+ ? bkey_start_offset(k.k)
+ : KEY_OFFSET_MAX) -
+ n.p.offset));
+
+ EBUG_ON(!n.size);
+ }
+
+ iter->k = n;
+ return (struct bkey_s_c) { &iter->k, NULL };
+ } else if (!bkey_deleted(k.k)) {
+ return k;
+ } else {
+ __btree_iter_advance(iter);
+ }
+ }
+
+ EBUG_ON(!iter->error &&
+ (iter->btree_id != BTREE_ID_INODES
+ ? bkey_cmp(iter->pos, POS_MAX)
+ : iter->pos.inode != KEY_INODE_MAX));
+
+ return bkey_s_c_null;
+}
+
void __bch_btree_iter_init(struct btree_iter *iter, struct cache_set *c,
enum btree_id btree_id, struct bpos pos,
int locks_want)
diff --git a/drivers/md/bcache/btree_iter.h b/drivers/md/bcache/btree_iter.h
index 0a13b7df9b12..96356558812f 100644
--- a/drivers/md/bcache/btree_iter.h
+++ b/drivers/md/bcache/btree_iter.h
@@ -144,6 +144,12 @@ struct btree *bch_btree_iter_next_node(struct btree_iter *);
struct bkey_s_c bch_btree_iter_peek(struct btree_iter *);
struct bkey_s_c bch_btree_iter_peek_with_holes(struct btree_iter *);
void bch_btree_iter_set_pos_same_leaf(struct btree_iter *, struct bpos);
+
+struct bkey_s_c bch_btree_iter_peek_snapshot(struct btree_iter *,
+ struct snapshot *);
+struct bkey_s_c bch_btree_iter_peek_snapshot_with_holes(struct btree_iter *,
+ struct snapshot *);
+
void bch_btree_iter_set_pos(struct btree_iter *, struct bpos);
void bch_btree_iter_advance_pos(struct btree_iter *);
void bch_btree_iter_rewind(struct btree_iter *, struct bpos);
@@ -205,31 +211,45 @@ static inline int btree_iter_cmp(const struct btree_iter *l,
__for_each_btree_node(_iter, _c, _btree_id, _start, _b, 0)
#define __for_each_btree_key(_iter, _c, _btree_id, _start, \
- _k, _locks_want) \
+ _k, _peek, _locks_want) \
for (__bch_btree_iter_init((_iter), (_c), (_btree_id), \
_start, _locks_want); \
- ((_k) = bch_btree_iter_peek(_iter)).k; \
- bch_btree_iter_advance_pos(_iter))
+ ((_k) = (_peek)).k; bch_btree_iter_advance_pos(_iter))
#define for_each_btree_key(_iter, _c, _btree_id, _start, _k) \
- __for_each_btree_key(_iter, _c, _btree_id, _start, _k, 0)
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek(_iter), 0)
#define for_each_btree_key_intent(_iter, _c, _btree_id, _start, _k) \
- __for_each_btree_key(_iter, _c, _btree_id, _start, _k, 1)
-
-#define __for_each_btree_key_with_holes(_iter, _c, _btree_id, \
- _start, _k, _locks_want) \
- for (__bch_btree_iter_init((_iter), (_c), (_btree_id), \
- _start, _locks_want); \
- ((_k) = bch_btree_iter_peek_with_holes(_iter)).k; \
- bch_btree_iter_advance_pos(_iter))
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek(_iter), 1)
#define for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k) \
- __for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k, 0)
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek_with_holes(_iter), 0)
#define for_each_btree_key_with_holes_intent(_iter, _c, _btree_id, \
_start, _k) \
- __for_each_btree_key_with_holes(_iter, _c, _btree_id, _start, _k, 1)
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek_with_holes(_iter), 0)
+
+#define for_each_snapshot_key(_iter, _c, _snap, _btree_id, _start, _k) \
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek_snapshot(_iter, _snap), 0)
+
+#define for_each_snapshot_key_intent(_iter, _c, _snap, _btree_id, _start, _k)\
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek_snapshot(_iter, _snap), 1)
+
+#define for_each_snapshot_key_with_holes(_iter, _c, _snap, \
+ _btree_id, _start, _k) \
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek_snapshot_with_holes(_iter, _snap), 0)
+
+#define for_each_snapshot_key_with_holes_intent(_iter, _c, _snap, \
+ _btree_id, _start, _k) \
+ __for_each_btree_key(_iter, _c, _btree_id, _start, _k, \
+ bch_btree_iter_peek_snapshot_with_holes(_iter, _snap), 1)
/*
* Unlocks before scheduling
diff --git a/drivers/md/bcache/btree_update.c b/drivers/md/bcache/btree_update.c
index d6a694667d70..2d15e4d4c994 100644
--- a/drivers/md/bcache/btree_update.c
+++ b/drivers/md/bcache/btree_update.c
@@ -1586,6 +1586,8 @@ int bch_btree_insert_trans(struct btree_insert_trans *trans,
unsigned u64s;
int ret;
+ BUG_ON(!trans->snapshot);
+
closure_init_stack(&cl);
trans_for_each_entry(trans, i) {
@@ -1771,29 +1773,32 @@ err:
* -EROFS: cache set read only
* -EIO: journal or btree node IO error
*/
-int bch_btree_insert_at(struct btree_iter *iter,
+int bch_btree_insert_at(struct snapshot *snapshot,
+ struct btree_iter *iter,
struct bkey_i *insert_key,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq, unsigned flags)
{
struct btree_insert_trans m = {
- .nr = 1,
- .entries = &(struct btree_trans_entry) {
- .iter = iter,
- .k = insert_key,
- .done = false,
+ .snapshot = snapshot,
+ .nr = 1,
+ .entries = &(struct btree_trans_entry) {
+ .iter = iter,
+ .k = insert_key,
+ .done = false,
},
};
- int ret = bch_btree_insert_trans(&m, disk_res,
- hook, journal_seq, flags);
+ int ret = bch_btree_insert_trans(&m, disk_res, hook,
+ journal_seq, flags);
BUG_ON(!ret != m.entries[0].done);
return ret;
}
-int bch_btree_insert_list_at(struct btree_iter *iter,
+int bch_btree_insert_list_at(struct snapshot *snapshot,
+ struct btree_iter *iter,
struct keylist *keys,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
@@ -1809,8 +1814,9 @@ int bch_btree_insert_list_at(struct btree_iter *iter,
if (ret)
return ret;
- ret = bch_btree_insert_at(iter, bch_keylist_front(keys),
- disk_res, hook, journal_seq, flags);
+ ret = bch_btree_insert_at(snapshot, iter,
+ bch_keylist_front(keys), disk_res,
+ hook, journal_seq, flags);
if (ret)
return ret;
@@ -1832,7 +1838,8 @@ int bch_btree_insert_list_at(struct btree_iter *iter,
* -EAGAIN: @iter->cl was put on a waitlist waiting for btree node allocation
* -EINTR: btree node was changed while upgrading to write lock
*/
-int bch_btree_insert_check_key(struct btree_iter *iter,
+int bch_btree_insert_check_key(struct snapshot *snapshot,
+ struct btree_iter *iter,
struct bkey_i *check_key)
{
struct bpos saved_pos = iter->pos;
@@ -1850,8 +1857,8 @@ int bch_btree_insert_check_key(struct btree_iter *iter,
bkey_copy(&tmp.key, check_key);
- ret = bch_btree_insert_at(iter, &tmp.key, NULL, NULL,
- NULL, BTREE_INSERT_ATOMIC);
+ ret = bch_btree_insert_at(snapshot, iter, &tmp.key, NULL,
+ NULL, NULL, BTREE_INSERT_ATOMIC);
bch_btree_iter_rewind(iter, saved_pos);
@@ -1865,8 +1872,8 @@ int bch_btree_insert_check_key(struct btree_iter *iter,
* @insert_keys: list of keys to insert
* @hook: insert callback
*/
-int bch_btree_insert(struct cache_set *c, enum btree_id id,
- struct bkey_i *k,
+int bch_btree_insert(struct cache_set *c, struct snapshot *snapshot,
+ enum btree_id id, struct bkey_i *k,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq, int flags)
@@ -1880,7 +1887,7 @@ int bch_btree_insert(struct cache_set *c, enum btree_id id,
if (unlikely(ret))
goto out;
- ret = bch_btree_insert_at(&iter, k, disk_res,
+ ret = bch_btree_insert_at(snapshot, &iter, k, disk_res,
hook, journal_seq, flags);
out: ret2 = bch_btree_iter_unlock(&iter);
@@ -1891,8 +1898,8 @@ out: ret2 = bch_btree_iter_unlock(&iter);
* bch_btree_update - like bch_btree_insert(), but asserts that we're
* overwriting an existing key
*/
-int bch_btree_update(struct cache_set *c, enum btree_id id,
- struct bkey_i *k, u64 *journal_seq)
+int bch_btree_update(struct cache_set *c, struct snapshot *snapshot,
+ enum btree_id id, struct bkey_i *k, u64 *journal_seq)
{
struct btree_iter iter;
struct bkey_s_c u;
@@ -1902,12 +1909,13 @@ int bch_btree_update(struct cache_set *c, enum btree_id id,
bch_btree_iter_init_intent(&iter, c, id, k->k.p);
- u = bch_btree_iter_peek_with_holes(&iter);
+ u = bch_btree_iter_peek_snapshot_with_holes(&iter, snapshot);
if (!u.k || bkey_deleted(u.k))
return -ENOENT;
- ret = bch_btree_insert_at(&iter, k, NULL, NULL, journal_seq, 0);
+ ret = bch_btree_insert_at(snapshot, &iter, k, NULL,
+ NULL, journal_seq, 0);
ret2 = bch_btree_iter_unlock(&iter);
return ret ?: ret2;
@@ -1918,7 +1926,9 @@ int bch_btree_update(struct cache_set *c, enum btree_id id,
*
* Range is a half open interval - [start, end)
*/
-int bch_btree_delete_range(struct cache_set *c, enum btree_id id,
+int bch_btree_delete_range(struct cache_set *c,
+ struct snapshot *snapshot,
+ enum btree_id id,
struct bpos start,
struct bpos end,
u64 version,
@@ -1932,7 +1942,7 @@ int bch_btree_delete_range(struct cache_set *c, enum btree_id id,
bch_btree_iter_init_intent(&iter, c, id, start);
- while ((k = bch_btree_iter_peek(&iter)).k) {
+ while ((k = bch_btree_iter_peek_snapshot(&iter, snapshot)).k) {
unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits);
/* really shouldn't be using a bare, unpadded bkey_i */
struct bkey_i delete;
@@ -1970,8 +1980,9 @@ int bch_btree_delete_range(struct cache_set *c, enum btree_id id,
bch_cut_back(end, &delete.k);
}
- ret = bch_btree_insert_at(&iter, &delete, disk_res, hook,
- journal_seq, BTREE_INSERT_NOFAIL);
+ ret = bch_btree_insert_at(snapshot, &iter, &delete, disk_res,
+ hook, journal_seq,
+ BTREE_INSERT_NOFAIL);
if (ret)
break;
diff --git a/drivers/md/bcache/btree_update.h b/drivers/md/bcache/btree_update.h
index e8426308ade5..c11a61470d64 100644
--- a/drivers/md/bcache/btree_update.h
+++ b/drivers/md/bcache/btree_update.h
@@ -204,14 +204,8 @@ void bch_btree_insert_node(struct btree *, struct btree_iter *,
*/
#define BTREE_INSERT_NO_MARK_KEY (1 << 2)
-int bch_btree_insert_at(struct btree_iter *, struct bkey_i *,
- struct disk_reservation *,
- struct extent_insert_hook *, u64 *, unsigned);
-int bch_btree_insert_list_at(struct btree_iter *, struct keylist *,
- struct disk_reservation *,
- struct extent_insert_hook *, u64 *, unsigned);
-
struct btree_insert_trans {
+ struct snapshot *snapshot;
unsigned nr;
bool did_work;
struct btree_trans_entry {
@@ -248,15 +242,24 @@ int bch_btree_insert_trans(struct btree_insert_trans *,
struct extent_insert_hook *,
u64 *, unsigned);
-int bch_btree_insert_check_key(struct btree_iter *, struct bkey_i *);
-int bch_btree_insert(struct cache_set *, enum btree_id, struct bkey_i *,
+int bch_btree_insert_at(struct snapshot *, struct btree_iter *,
+ struct bkey_i *, struct disk_reservation *,
+ struct extent_insert_hook *, u64 *, unsigned);
+int bch_btree_insert_list_at(struct snapshot *, struct btree_iter *,
+ struct keylist *, struct disk_reservation *,
+ struct extent_insert_hook *, u64 *, unsigned);
+
+int bch_btree_insert_check_key(struct snapshot *,
+ struct btree_iter *, struct bkey_i *);
+int bch_btree_insert(struct cache_set *, struct snapshot *,
+ enum btree_id, struct bkey_i *,
struct disk_reservation *,
struct extent_insert_hook *, u64 *, int flags);
-int bch_btree_update(struct cache_set *, enum btree_id,
- struct bkey_i *, u64 *);
+int bch_btree_update(struct cache_set *, struct snapshot *,
+ enum btree_id, struct bkey_i *, u64 *);
-int bch_btree_delete_range(struct cache_set *, enum btree_id,
- struct bpos, struct bpos, u64,
+int bch_btree_delete_range(struct cache_set *, struct snapshot *,
+ enum btree_id, struct bpos, struct bpos, u64,
struct disk_reservation *,
struct extent_insert_hook *, u64 *);
diff --git a/drivers/md/bcache/dirent.c b/drivers/md/bcache/dirent.c
index 25e70cc9e7e5..26e5d51a47f6 100644
--- a/drivers/md/bcache/dirent.c
+++ b/drivers/md/bcache/dirent.c
@@ -158,7 +158,7 @@ int bch_dirent_create(struct inode *dir, u8 type,
const struct qstr *name, u64 dst_inum)
{
struct cache_set *c = dir->i_sb->s_fs_info;
- struct bch_inode_info *ei = to_bch_ei(dir);
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
struct bkey_i_dirent *dirent;
int ret;
@@ -166,9 +166,9 @@ int bch_dirent_create(struct inode *dir, u8 type,
if (!dirent)
return -ENOMEM;
- ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &ei->journal_seq,
- &dirent->k_i, 0);
+ ret = bch_hash_set(dirent_hash_desc, &dir_ei->str_hash, c,
+ dir_ei->snapshot, dir_ei->vfs_inode.i_ino,
+ &dirent->k_i, &dir_ei->journal_seq, 0);
kfree(dirent);
return ret;
@@ -227,10 +227,12 @@ int bch_dirent_rename(struct cache_set *c,
if (bkey_cmp(src_iter.pos, dst_iter.pos) < 0) {
old_src = bch_hash_lookup_at(dirent_hash_desc,
&src_ei->str_hash,
+ src_ei->snapshot,
&src_iter, src_name);
need_whiteout = bch_hash_needs_whiteout(dirent_hash_desc,
&src_ei->str_hash,
+ src_ei->snapshot,
&whiteout_iter, &src_iter);
/*
@@ -239,23 +241,29 @@ int bch_dirent_rename(struct cache_set *c,
* to do that check for us for correctness:
*/
old_dst = mode == BCH_RENAME
- ? bch_hash_hole_at(dirent_hash_desc, &dst_iter)
+ ? bch_hash_hole_at(dirent_hash_desc,
+ dst_ei->snapshot, &dst_iter)
: bch_hash_lookup_at(dirent_hash_desc,
&dst_ei->str_hash,
+ dst_ei->snapshot,
&dst_iter, dst_name);
} else {
old_dst = mode == BCH_RENAME
- ? bch_hash_hole_at(dirent_hash_desc, &dst_iter)
+ ? bch_hash_hole_at(dirent_hash_desc,
+ dst_ei->snapshot, &dst_iter)
: bch_hash_lookup_at(dirent_hash_desc,
&dst_ei->str_hash,
+ dst_ei->snapshot,
&dst_iter, dst_name);
old_src = bch_hash_lookup_at(dirent_hash_desc,
&src_ei->str_hash,
+ src_ei->snapshot,
&src_iter, src_name);
need_whiteout = bch_hash_needs_whiteout(dirent_hash_desc,
&src_ei->str_hash,
+ src_ei->snapshot,
&whiteout_iter, &src_iter);
}
@@ -291,9 +299,9 @@ int bch_dirent_rename(struct cache_set *c,
* __dirent_find_hole() found
*/
new_dst->k.p = src_iter.pos;
- ret = bch_btree_insert_at(&src_iter,
- &new_dst->k_i, NULL, NULL,
- journal_seq,
+ ret = bch_btree_insert_at(src_ei->snapshot,
+ &src_iter, &new_dst->k_i,
+ NULL, NULL, journal_seq,
BTREE_INSERT_ATOMIC);
goto insert_done;
}
@@ -327,6 +335,7 @@ int bch_dirent_rename(struct cache_set *c,
new_src->k.p = src_iter.pos;
new_dst->k.p = dst_iter.pos;
ret = bch_btree_insert_trans(&(struct btree_insert_trans) {
+ .snapshot = src_ei->snapshot,
.nr = 2,
.entries = (struct btree_trans_entry[]) {
{ &src_iter, &new_src->k_i, },
@@ -362,11 +371,11 @@ err_unlock:
int bch_dirent_delete(struct inode *dir, const struct qstr *name)
{
struct cache_set *c = dir->i_sb->s_fs_info;
- struct bch_inode_info *ei = to_bch_ei(dir);
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
- return bch_hash_delete(dirent_hash_desc, &ei->str_hash,
- c, ei->vfs_inode.i_ino,
- &ei->journal_seq, name);
+ return bch_hash_delete(dirent_hash_desc, &dir_ei->str_hash,
+ c, dir_ei->snapshot, dir_ei->vfs_inode.i_ino,
+ &dir_ei->journal_seq, name);
}
u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name)
@@ -378,7 +387,8 @@ u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name)
u64 inum;
k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &iter, name);
+ ei->snapshot, ei->vfs_inode.i_ino,
+ &iter, name);
if (IS_ERR(k.k)) {
bch_btree_iter_unlock(&iter);
return 0;
@@ -390,13 +400,14 @@ u64 bch_dirent_lookup(struct inode *dir, const struct qstr *name)
return inum;
}
-int bch_empty_dir(struct cache_set *c, u64 dir_inum)
+int bch_empty_dir(struct cache_set *c, struct snapshot *snapshot, u64 dir_inum)
{
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(dir_inum, 0), k) {
+ for_each_snapshot_key(&iter, c, snapshot, BTREE_ID_DIRENTS,
+ POS(dir_inum, 0), k) {
if (k.k->p.inode > dir_inum)
break;
@@ -413,6 +424,7 @@ int bch_empty_dir(struct cache_set *c, u64 dir_inum)
int bch_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct super_block *sb = inode->i_sb;
struct cache_set *c = sb->s_fs_info;
struct btree_iter iter;
@@ -425,8 +437,8 @@ int bch_readdir(struct file *file, struct dir_context *ctx)
pr_debug("listing for %lu from %llu", inode->i_ino, ctx->pos);
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
- POS(inode->i_ino, ctx->pos), k) {
+ for_each_snapshot_key(&iter, c, ei->snapshot, BTREE_ID_DIRENTS,
+ POS(inode->i_ino, ctx->pos), k) {
if (k.k->type != BCH_DIRENT)
continue;
diff --git a/drivers/md/bcache/dirent.h b/drivers/md/bcache/dirent.h
index 63b4aa07f432..4b312deeee56 100644
--- a/drivers/md/bcache/dirent.h
+++ b/drivers/md/bcache/dirent.h
@@ -24,7 +24,7 @@ int bch_dirent_rename(struct cache_set *,
u64 *, enum bch_rename_mode);
u64 bch_dirent_lookup(struct inode *, const struct qstr *);
-int bch_empty_dir(struct cache_set *, u64);
+int bch_empty_dir(struct cache_set *, struct snapshot *, u64);
int bch_readdir(struct file *, struct dir_context *);
#endif /* _BCACHE_DIRENT_H */
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index af6159cad808..817346a2db13 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -15,6 +15,7 @@
#include "extents.h"
#include "inode.h"
#include "journal.h"
+#include "snapshot.h"
#include "super.h"
#include "writeback.h"
#include "xattr.h"
@@ -1179,6 +1180,11 @@ extent_insert_advance_pos(struct btree_insert_trans *trans,
k, res, flags, stats);
}
+static bool extent_should_overwrite(struct bkey *new, struct bkey *old)
+{
+ return new->p.snapshot == old->p.snapshot;
+}
+
/**
* bch_extent_insert_fixup - insert a new extent and deal with overlaps
*
@@ -1240,6 +1246,7 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
u64 start_time = local_clock();
enum btree_insert_ret ret = BTREE_INSERT_OK;
struct bpos committed_pos = iter->pos;
+ BKEY_PADDED(k) split;
EBUG_ON(iter->level);
EBUG_ON(bkey_deleted(&insert->k->k) || !insert->k->k.size);
@@ -1272,6 +1279,14 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
continue;
}
+ /*
+ * bkey_start_pos(k.k) not monotonically increasing except for
+ * ancestors of a given snapshot with nonzero size:
+ */
+ if (!bch_snapshot_is_descendant(c, trans->snapshot,
+ k.k->p.snapshot))
+ continue;
+
if (bkey_cmp(bkey_start_pos(k.k), insert->k->k.p) >= 0)
break;
@@ -1305,69 +1320,86 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
goto stop;
}
- /* k is the key currently in the tree, 'insert' is the new key */
- switch (overlap) {
- case BCH_EXTENT_OVERLAP_FRONT:
- /* insert overlaps with start of k: */
- bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
- BUG_ON(bkey_deleted(k.k));
- extent_save(&b->keys, node_iter, _k, k.k);
- break;
+ if (!extent_should_overwrite(&insert->k->k, k.k)) {
+ if (bkey_cmp(bkey_start_pos(k.k),
+ bkey_start_pos(&insert->k->k)) < 0) {
+ bkey_reassemble(&split.k, k.s_c);
+ bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k);
- case BCH_EXTENT_OVERLAP_BACK:
- /* insert overlaps with end of k: */
- bch_cut_subtract_back(iter,
- bkey_start_pos(&insert->k->k),
- k, &stats);
- BUG_ON(bkey_deleted(k.k));
- extent_save(&b->keys, node_iter, _k, k.k);
+ __bch_cut_front(bkey_start_pos(&insert->k->k), k);
+ extent_save(_k, k.k, f);
- /*
- * As the auxiliary tree is indexed by the end of the
- * key and we've just changed the end, update the
- * auxiliary tree.
- */
- bch_bset_fix_invalidated_key(&b->keys, _k);
- bch_btree_node_iter_fix(iter, b, node_iter, _k, true);
- break;
+ bch_btree_bset_insert(iter, b, node_iter, &split.k);
+ }
- case BCH_EXTENT_OVERLAP_ALL:
- /* The insert key completely covers k, invalidate k */
- if (!bkey_deleted(_k))
- btree_keys_account_key_drop(&b->keys.nr, _k);
+ if (bkey_cmp(k.k->p, insert->k->k.p) < 0) {
+ extent_insert_committed(trans, insert, res, flags, &stats);
+ /* XXX prevent back merges on next insert */
+ }
+ } else {
+ /* k is the key currently in the tree, 'insert' is the new key */
+ switch (overlap) {
+ case BCH_EXTENT_OVERLAP_FRONT:
+ /* insert overlaps with start of k: */
+ bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
+ BUG_ON(bkey_deleted(k.k));
+ extent_save(&b->keys, node_iter, _k, k.k);
+ break;
- bch_drop_subtract(iter, k, &stats);
- extent_save(&b->keys, node_iter, _k, k.k);
- break;
+ case BCH_EXTENT_OVERLAP_BACK:
+ /* insert overlaps with end of k: */
+ bch_cut_subtract_back(iter,
+ bkey_start_pos(&insert->k->k),
+ k, &stats);
+ BUG_ON(bkey_deleted(k.k));
+ extent_save(&b->keys, node_iter, _k, k.k);
+
+ /*
+ * As the auxiliary tree is indexed by the end of the
+ * key and we've just changed the end, update the
+ * auxiliary tree.
+ */
+ bch_bset_fix_invalidated_key(&b->keys, _k);
+ bch_btree_node_iter_fix(iter, b, node_iter, _k, true);
+ break;
- case BCH_EXTENT_OVERLAP_MIDDLE: {
- BKEY_PADDED(k) split;
- /*
- * The insert key falls 'in the middle' of k
- * The insert key splits k in 3:
- * - start only in k, preserve
- * - middle common section, invalidate in k
- * - end only in k, preserve
- *
- * We update the old key to preserve the start,
- * insert will be the new common section,
- * we manually insert the end that we are preserving.
- *
- * modify k _before_ doing the insert (which will move
- * what k points to)
- */
- bkey_reassemble(&split.k, k.s_c);
- bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k);
- BUG_ON(bkey_deleted(&split.k.k));
+ case BCH_EXTENT_OVERLAP_ALL:
+ /* The insert key completely covers k, invalidate k */
+ if (!bkey_deleted(_k))
+ btree_keys_account_key_drop(&b->keys.nr, _k);
- __bch_cut_front(bkey_start_pos(&insert->k->k), k);
- bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
- BUG_ON(bkey_deleted(k.k));
- extent_save(&b->keys, node_iter, _k, k.k);
+ bch_drop_subtract(iter, k, &stats);
+ extent_save(&b->keys, node_iter, _k, k.k);
+ break;
- bch_btree_bset_insert(iter, b, node_iter, &split.k);
- break;
- }
+ case BCH_EXTENT_OVERLAP_MIDDLE: {
+ BKEY_PADDED(k) split;
+ /*
+ * The insert key falls 'in the middle' of k
+ * The insert key splits k in 3:
+ * - start only in k, preserve
+ * - middle common section, invalidate in k
+ * - end only in k, preserve
+ *
+ * We update the old key to preserve the start,
+ * insert will be the new common section,
+ * we manually insert the end that we are preserving.
+ *
+ * modify k _before_ doing the insert (which will move
+ * what k points to)
+ */
+ bkey_reassemble(&split.k, k.s_c);
+ bch_cut_back(bkey_start_pos(&insert->k->k), &split.k.k);
+ BUG_ON(bkey_deleted(&split.k.k));
+
+ __bch_cut_front(bkey_start_pos(&insert->k->k), k);
+ bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
+ BUG_ON(bkey_deleted(k.k));
+ extent_save(&b->keys, node_iter, _k, k.k);
+
+ bch_btree_bset_insert(iter, b, node_iter, &split.k);
+ break;
+ }
}
}
@@ -2033,4 +2065,6 @@ const struct btree_keys_ops *bch_btree_ops[] = {
[BTREE_ID_INODES] = &bch_inode_ops,
[BTREE_ID_DIRENTS] = &bch_dirent_ops,
[BTREE_ID_XATTRS] = &bch_xattr_ops,
+ [BTREE_ID_SNAPSHOT_NAMES] = &bch_snapshot_name_ops,
+ [BTREE_ID_SNAPSHOT_TREE] = &bch_snapshot_tree_ops,
};
diff --git a/drivers/md/bcache/fs-gc.c b/drivers/md/bcache/fs-gc.c
index b54105aa0521..9cbc0e014fa5 100644
--- a/drivers/md/bcache/fs-gc.c
+++ b/drivers/md/bcache/fs-gc.c
@@ -7,6 +7,7 @@
#include "fs-gc.h"
#include "inode.h"
#include "keylist.h"
+#include "snapshot.h"
#include "super.h"
#include <linux/generic-radix-tree.h>
@@ -45,7 +46,8 @@ static void inc_link(struct nlinks *links,
*/
noinline_for_stack
-static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
+static int bch_gc_walk_dirents(struct cache_set *c, struct snapshot *snapshot,
+ struct nlinks *links,
u64 range_start, u64 *range_end)
{
struct btree_iter iter;
@@ -55,7 +57,8 @@ static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
inc_link(links, range_start, range_end, BCACHE_ROOT_INO, 2, false);
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, k) {
+ for_each_snapshot_key(&iter, c, snapshot,
+ BTREE_ID_DIRENTS, POS_MIN, k) {
switch (k.k->type) {
case BCH_DIRENT:
d = bkey_s_c_to_dirent(k);
@@ -79,24 +82,30 @@ static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
return bch_btree_iter_unlock(&iter);
}
-s64 bch_count_inode_sectors(struct cache_set *c, u64 inum)
+static s64 bch_count_inode_sectors(struct cache_set *c,
+ struct snapshot *snapshot,
+ u64 inum)
{
struct btree_iter iter;
struct bkey_s_c k;
u64 sectors = 0;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(inum, 0), k) {
+ for_each_snapshot_key(&iter, c, snapshot,
+ BTREE_ID_EXTENTS, POS(inum, 0), k) {
if (k.k->p.inode != inum)
break;
if (bkey_extent_is_allocation(k.k))
- sectors += k.k->size;
+ sectors += k.k->p.offset -
+ max(bkey_start_offset(k.k),
+ iter.pos.offset);
}
return bch_btree_iter_unlock(&iter) ?: sectors;
}
-static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
+static int bch_gc_do_inode(struct cache_set *c, struct snapshot *snapshot,
+ struct btree_iter *iter,
struct bkey_s_c_inode inode, struct nlink link)
{
struct bkey_i_inode update;
@@ -114,14 +123,14 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
if (!link.count) {
cache_set_inconsistent_on(S_ISDIR(i_mode) &&
- bch_empty_dir(c, inode.k->p.inode), c,
+ bch_empty_dir(c, snapshot, inode.k->p.inode), c,
"non empty directory with link count 0,inode nlink %u, dir links found %u",
i_nlink, link.dir_count);
if (c->opts.verbose_recovery)
bch_info(c, "deleting inum %llu", inode.k->p.inode);
- return bch_inode_rm(c, inode.k->p.inode);
+ return bch_inode_rm(c, snapshot, inode.k->p.inode);
}
if (i_flags & BCH_INODE_I_SIZE_DIRTY) {
@@ -133,7 +142,7 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
* just switch units to bytes and that issue goes away
*/
- ret = bch_inode_truncate(c, inode.k->p.inode,
+ ret = bch_inode_truncate(c, snapshot, inode.k->p.inode,
round_up(i_size, PAGE_SIZE) >> 9,
NULL, NULL);
if (ret)
@@ -150,7 +159,8 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
if (c->opts.verbose_recovery)
bch_info(c, "recounting sectors for inode %llu", inode.k->p.inode);
- i_sectors = bch_count_inode_sectors(c, inode.k->p.inode);
+ i_sectors = bch_count_inode_sectors(c, snapshot,
+ inode.k->p.inode);
if (i_sectors < 0)
return i_sectors;
}
@@ -172,15 +182,17 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
if (i_flags & BCH_INODE_I_SECTORS_DIRTY)
update.v.i_sectors = cpu_to_le64(i_sectors);
- return bch_btree_insert_at(iter, &update.k_i, NULL, NULL,
- NULL, BTREE_INSERT_NOFAIL);
+ return bch_btree_insert_at(snapshot, iter, &update.k_i,
+ NULL, NULL, NULL,
+ BTREE_INSERT_NOFAIL);
}
return 0;
}
noinline_for_stack
-static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links,
+static int bch_gc_walk_inodes(struct cache_set *c, struct snapshot *snapshot,
+ struct nlinks *links,
u64 range_start, u64 range_end)
{
struct btree_iter iter;
@@ -191,7 +203,7 @@ static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links,
bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(range_start, 0));
- while ((k = bch_btree_iter_peek(&iter)).k) {
+ while ((k = bch_btree_iter_peek_snapshot(&iter, snapshot)).k) {
if (k.k->p.inode >= range_end)
break;
@@ -213,7 +225,7 @@ static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links,
*/
bch_btree_iter_unlock(&iter);
- ret = bch_gc_do_inode(c, &iter,
+ ret = bch_gc_do_inode(c, snapshot, &iter,
bkey_s_c_to_inode(k),
*link);
if (ret == -EINTR)
@@ -240,7 +252,7 @@ out:
return bch_btree_iter_unlock(&iter) ?: ret;
}
-int bch_gc_inode_nlinks(struct cache_set *c)
+int bch_gc_inode_nlinks(struct cache_set *c, struct snapshot *snapshot)
{
struct nlinks links;
u64 this_iter_range_start, next_iter_range_start = 0;
@@ -252,13 +264,13 @@ int bch_gc_inode_nlinks(struct cache_set *c)
this_iter_range_start = next_iter_range_start;
next_iter_range_start = U64_MAX;
- ret = bch_gc_walk_dirents(c, &links,
+ ret = bch_gc_walk_dirents(c, snapshot, &links,
this_iter_range_start,
&next_iter_range_start);
if (ret)
break;
- ret = bch_gc_walk_inodes(c, &links,
+ ret = bch_gc_walk_inodes(c, snapshot, &links,
this_iter_range_start,
next_iter_range_start);
if (ret)
@@ -272,7 +284,8 @@ int bch_gc_inode_nlinks(struct cache_set *c)
return ret;
}
-static inline bool next_inode(struct cache_set *c, struct bkey_s_c k,
+static inline bool next_inode(struct cache_set *c, struct snapshot *snapshot,
+ struct bkey_s_c k,
u64 *cur_inum,
struct bkey_i_inode *inode,
struct bch_inode **bi,
@@ -281,7 +294,7 @@ static inline bool next_inode(struct cache_set *c, struct bkey_s_c k,
if (k.k->p.inode == *cur_inum)
return false;
- if (!bch_inode_find_by_inum(c, k.k->p.inode, inode)) {
+ if (!bch_inode_find_by_inum(c, snapshot, k.k->p.inode, inode)) {
*i_mode = le16_to_cpu(inode->v.i_mode);
*i_size = le64_to_cpu(inode->v.i_size);
*bi = &inode->v;
@@ -302,7 +315,7 @@ do { \
* Checks for inconsistencies that shouldn't happen, unless we have a bug.
* Doesn't fix them yet, mainly because they haven't yet been observed:
*/
-void bch_fsck(struct cache_set *c)
+void bch_fsck(struct cache_set *c, struct snapshot *snapshot)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -314,16 +327,16 @@ void bch_fsck(struct cache_set *c)
char buf[100];
cur_inum = -1;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(BCACHE_ROOT_INO, 0), k) {
+ for_each_snapshot_key(&iter, c, snapshot, BTREE_ID_EXTENTS,
+ POS(BCACHE_ROOT_INO, 0), k) {
if (k.k->type == KEY_TYPE_DISCARD)
continue;
- if (next_inode(c, k, &cur_inum, &inode, &bi,
- &i_size, &i_mode) &&
+ if (next_inode(c, snapshot, k, &cur_inum, &inode,
+ &bi, &i_size, &i_mode) &&
bi &&
!(le32_to_cpu(bi->i_flags) & BCH_INODE_I_SECTORS_DIRTY)) {
- u64 i_sectors = bch_count_inode_sectors(c, cur_inum);
+ u64 i_sectors = bch_count_inode_sectors(c, snapshot, cur_inum);
if (i_sectors != le64_to_cpu(bi->i_sectors))
fsck_err(c,
@@ -349,9 +362,10 @@ void bch_fsck(struct cache_set *c)
bch_btree_iter_unlock(&iter);
cur_inum = -1;
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
- POS(BCACHE_ROOT_INO, 0), k) {
- next_inode(c, k, &cur_inum, &inode, &bi, &i_size, &i_mode);
+ for_each_snapshot_key(&iter, c, snapshot, BTREE_ID_DIRENTS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ next_inode(c, snapshot, k, &cur_inum, &inode,
+ &bi, &i_size, &i_mode);
if (!bi)
fsck_err(c, "dirent for missing inode %llu", k.k->p.inode);
@@ -364,9 +378,10 @@ void bch_fsck(struct cache_set *c)
bch_btree_iter_unlock(&iter);
cur_inum = -1;
- for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
- POS(BCACHE_ROOT_INO, 0), k) {
- next_inode(c, k, &cur_inum, &inode, &bi, &i_size, &i_mode);
+ for_each_snapshot_key(&iter, c, snapshot, BTREE_ID_XATTRS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ next_inode(c, snapshot, k, &cur_inum, &inode,
+ &bi, &i_size, &i_mode);
if (!bi)
fsck_err(c, "xattr for missing inode %llu",
diff --git a/drivers/md/bcache/fs-gc.h b/drivers/md/bcache/fs-gc.h
index 04f08978af3a..0f1bf95424ee 100644
--- a/drivers/md/bcache/fs-gc.h
+++ b/drivers/md/bcache/fs-gc.h
@@ -1,8 +1,7 @@
#ifndef _BCACHE_FS_GC_H
#define _BCACHE_FS_GC_H
-s64 bch_count_inode_sectors(struct cache_set *, u64);
-int bch_gc_inode_nlinks(struct cache_set *);
-void bch_fsck(struct cache_set *);
+int bch_gc_inode_nlinks(struct cache_set *, struct snapshot *);
+void bch_fsck(struct cache_set *, struct snapshot *);
#endif /* _BCACHE_FS_GC_H */
diff --git a/drivers/md/bcache/fs-io.c b/drivers/md/bcache/fs-io.c
index fe557c77176d..60bd34131dd0 100644
--- a/drivers/md/bcache/fs-io.c
+++ b/drivers/md/bcache/fs-io.c
@@ -297,7 +297,8 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
if (hook.need_inode_update) {
struct btree_insert_trans trans = {
- .nr = 2,
+ .snapshot = op->ei->snapshot,
+ .nr = 2,
.entries = (struct btree_trans_entry[]) {
{ .iter = &extent_iter, .k = k },
{ .iter = &inode_iter, .k = &hook.new_inode.k_i },
@@ -309,7 +310,8 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
if (ret)
break;
- inode = bch_btree_iter_peek_with_holes(&inode_iter);
+ inode = bch_btree_iter_peek_snapshot_with_holes(&inode_iter,
+ op->ei->snapshot);
if (WARN_ONCE(!inode.k ||
inode.k->type != BCH_INODE_FS,
@@ -327,7 +329,8 @@ static int bchfs_write_index_update(struct bch_write_op *wop)
BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC);
} else {
- ret = bch_btree_insert_at(&extent_iter, k,
+ ret = bch_btree_insert_at(op->ei->snapshot,
+ &extent_iter, k,
&wop->res, &hook.hook,
op_journal_seq(wop),
BTREE_INSERT_NOFAIL|
@@ -610,12 +613,14 @@ static void bch_add_page_sectors(struct bio *bio, const struct bkey *k)
}
}
-static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode)
+static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio,
+ struct bch_inode_info *ei)
{
struct bio *bio = &rbio->bio;
struct btree_iter iter;
struct bkey_s_c k;
struct bio_vec *bv;
+ u64 inode = ei->vfs_inode.i_ino;
unsigned i;
bch_increment_clock(c, bio_sectors(bio), READ);
@@ -640,8 +645,9 @@ static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode
s->sectors = 0;
}
- for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
- POS(inode, bio->bi_iter.bi_sector), k) {
+ for_each_snapshot_key_with_holes(&iter, c, ei->snapshot,
+ BTREE_ID_EXTENTS,
+ POS(inode, bio->bi_iter.bi_sector), k) {
BKEY_PADDED(k) tmp;
struct extent_pick_ptr pick;
unsigned bytes, sectors;
@@ -675,7 +681,7 @@ static void bchfs_read(struct cache_set *c, struct bch_read_bio *rbio, u64 inode
PTR_BUCKET(pick.ca, &pick.ptr)->read_prio =
c->prio_clock[READ].hand;
- bch_read_extent(c, rbio, k, &pick,
+ bch_read_extent(c, ei->snapshot, rbio, k, &pick,
BCH_READ_RETRY_IF_STALE|
BCH_READ_PROMOTE|
(is_last ? BCH_READ_IS_LAST : 0));
@@ -728,14 +734,14 @@ again:
}
if (bch_bio_add_page(&rbio->bio, page)) {
- bchfs_read(c, rbio, inode->i_ino);
+ bchfs_read(c, rbio, to_bch_ei(inode));
rbio = NULL;
goto again;
}
}
if (rbio)
- bchfs_read(c, rbio, inode->i_ino);
+ bchfs_read(c, rbio, to_bch_ei(inode));
if (current->pagecache_lock != &mapping->add_lock)
pagecache_add_put(&mapping->add_lock);
@@ -758,7 +764,7 @@ int bch_readpage(struct file *file, struct page *page)
rbio->bio.bi_end_io = bch_readpages_end_io;
bch_bio_add_page(&rbio->bio, page);
- bchfs_read(c, rbio, inode->i_ino);
+ bchfs_read(c, rbio, to_bch_ei(inode));
return 0;
}
@@ -863,9 +869,9 @@ alloc_io:
w->io->op.ei = ei;
w->io->op.sectors_added = 0;
w->io->op.is_dio = false;
- bch_write_op_init(&w->io->op.op, w->c, &w->io->bio,
- (struct disk_reservation) { 0 }, NULL,
- bkey_to_s_c(&KEY(w->inum, 0, 0)),
+ bch_write_op_init(&w->io->op.op, w->c, ei->snapshot,
+ &w->io->bio, (struct disk_reservation) { 0 },
+ NULL, bkey_to_s_c(&KEY(w->inum, 0, 0)),
NULL, &ei->journal_seq, 0);
w->io->op.op.index_update_fn = bchfs_write_index_update;
}
@@ -1003,7 +1009,7 @@ static int bch_read_single_page(struct page *page,
rbio->bio.bi_end_io = bch_read_single_page_end_io;
bch_bio_add_page(&rbio->bio, page);
- bchfs_read(c, rbio, inode->i_ino);
+ bchfs_read(c, rbio, to_bch_ei(inode));
wait_for_completion(&done);
ret = rbio->bio.bi_error;
@@ -1154,6 +1160,7 @@ static int bch_direct_IO_read(struct cache_set *c, struct kiocb *req,
struct file *file, struct inode *inode,
struct iov_iter *iter, loff_t offset)
{
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct dio_read *dio;
struct bio *bio;
bool sync = is_sync_kiocb(req);
@@ -1221,7 +1228,8 @@ start:
if (iter->count)
closure_get(&dio->cl);
- bch_read(c, container_of(bio,
+ bch_read(c, ei->snapshot,
+ container_of(bio,
struct bch_read_bio, bio),
inode->i_ino);
}
@@ -1312,7 +1320,7 @@ static void bch_do_direct_IO_write(struct dio_write *dio)
dio->iop.sectors_added = 0;
dio->iop.is_dio = true;
dio->iop.new_i_size = U64_MAX;
- bch_write_op_init(&dio->iop.op, dio->c, &dio->bio,
+ bch_write_op_init(&dio->iop.op, dio->c, ei->snapshot, &dio->bio,
(struct disk_reservation) {
.sectors = bio_sectors(bio),
.gen = dio->res.gen
@@ -1638,6 +1646,7 @@ static int __bch_truncate_page(struct address_space *mapping,
pgoff_t index, loff_t start, loff_t end)
{
struct inode *inode = mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = inode->i_sb->s_fs_info;
unsigned start_offset = start & (PAGE_SIZE - 1);
unsigned end_offset = ((end - 1) & (PAGE_SIZE - 1)) + 1;
@@ -1662,9 +1671,9 @@ static int __bch_truncate_page(struct address_space *mapping,
* XXX: we're doing two index lookups when we end up reading the
* page
*/
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(inode->i_ino,
- index << (PAGE_SHIFT - 9)), k) {
+ for_each_snapshot_key(&iter, c, ei->snapshot, BTREE_ID_EXTENTS,
+ POS(inode->i_ino,
+ index << (PAGE_SHIFT - 9)), k) {
if (bkey_cmp(bkey_start_pos(k.k),
POS(inode->i_ino,
(index + 1) << (PAGE_SHIFT - 9))) >= 0)
@@ -1772,7 +1781,7 @@ int bch_truncate(struct inode *inode, struct iattr *iattr)
goto err;
}
- ret = bch_inode_truncate(c, inode->i_ino,
+ ret = bch_inode_truncate(c, ei->snapshot, inode->i_ino,
round_up(iattr->ia_size, PAGE_SIZE) >> 9,
&i_sectors_hook.hook,
&ei->journal_seq);
@@ -1844,7 +1853,7 @@ static long bch_fpunch(struct inode *inode, loff_t offset, loff_t len)
if (unlikely(ret))
goto out;
- ret = bch_discard(c,
+ ret = bch_discard(c, ei->snapshot,
POS(ino, discard_start),
POS(ino, discard_end),
0,
@@ -1925,7 +1934,7 @@ static long bch_fcollapse(struct inode *inode, loff_t offset, loff_t len)
if (ret)
goto err_unwind;
- k = bch_btree_iter_peek_with_holes(&src);
+ k = bch_btree_iter_peek_snapshot_with_holes(&src, ei->snapshot);
if (!k.k) {
ret = -EIO;
goto err_unwind;
@@ -1945,7 +1954,8 @@ static long bch_fcollapse(struct inode *inode, loff_t offset, loff_t len)
BCH_DISK_RESERVATION_NOFAIL);
BUG_ON(ret);
- ret = bch_btree_insert_at(&dst, &copy.k, &disk_res,
+ ret = bch_btree_insert_at(ei->snapshot, &dst, &copy.k,
+ &disk_res,
&i_sectors_hook.hook,
&ei->journal_seq,
BTREE_INSERT_ATOMIC|
@@ -1961,7 +1971,7 @@ static long bch_fcollapse(struct inode *inode, loff_t offset, loff_t len)
bch_btree_iter_unlock(&src);
bch_btree_iter_unlock(&dst);
- ret = bch_inode_truncate(c, inode->i_ino,
+ ret = bch_inode_truncate(c, ei->snapshot, inode->i_ino,
round_up(new_size, PAGE_SIZE) >> 9,
&i_sectors_hook.hook,
&ei->journal_seq);
@@ -2056,7 +2066,8 @@ static long bch_fallocate(struct inode *inode, int mode,
while (bkey_cmp(iter.pos, end) < 0) {
struct disk_reservation disk_res = { 0 };
- k = bch_btree_iter_peek_with_holes(&iter);
+ k = bch_btree_iter_peek_snapshot_with_holes(&iter,
+ ei->snapshot);
if (!k.k) {
ret = bch_btree_iter_unlock(&iter) ?: -EIO;
goto err_put_sectors_dirty;
@@ -2093,8 +2104,8 @@ static long bch_fallocate(struct inode *inode, int mode,
goto err_put_sectors_dirty;
}
- ret = bch_btree_insert_at(&iter, &reservation, &disk_res,
- &i_sectors_hook.hook,
+ ret = bch_btree_insert_at(ei->snapshot, &iter, &reservation,
+ &disk_res, &i_sectors_hook.hook,
&ei->journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL);
@@ -2204,6 +2215,7 @@ static loff_t bch_next_pagecache_data(struct inode *inode,
static loff_t bch_seek_data(struct file *file, u64 offset)
{
struct inode *inode = file->f_mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = inode->i_sb->s_fs_info;
struct btree_iter iter;
struct bkey_s_c k;
@@ -2214,8 +2226,8 @@ static loff_t bch_seek_data(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(inode->i_ino, offset >> 9), k) {
+ for_each_snapshot_key(&iter, c, ei->snapshot, BTREE_ID_EXTENTS,
+ POS(inode->i_ino, offset >> 9), k) {
if (k.k->p.inode != inode->i_ino) {
break;
} else if (bkey_extent_is_data(k.k)) {
@@ -2273,6 +2285,7 @@ static loff_t bch_next_pagecache_hole(struct inode *inode,
static loff_t bch_seek_hole(struct file *file, u64 offset)
{
struct inode *inode = file->f_mapping->host;
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = inode->i_sb->s_fs_info;
struct btree_iter iter;
struct bkey_s_c k;
@@ -2283,8 +2296,9 @@ static loff_t bch_seek_hole(struct file *file, u64 offset)
if (offset >= isize)
return -ENXIO;
- for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
- POS(inode->i_ino, offset >> 9), k) {
+ for_each_snapshot_key_with_holes(&iter, c, ei->snapshot,
+ BTREE_ID_EXTENTS,
+ POS(inode->i_ino, offset >> 9), k) {
if (k.k->p.inode != inode->i_ino) {
next_hole = bch_next_pagecache_hole(inode,
offset, MAX_LFS_FILESIZE);
diff --git a/drivers/md/bcache/fs.c b/drivers/md/bcache/fs.c
index e491ed3bc385..76598d7b009f 100644
--- a/drivers/md/bcache/fs.c
+++ b/drivers/md/bcache/fs.c
@@ -4,6 +4,7 @@
#include "btree_update.h"
#include "buckets.h"
#include "dirent.h"
+#include "error.h"
#include "extents.h"
#include "fs.h"
#include "fs-gc.h"
@@ -11,12 +12,14 @@
#include "inode.h"
#include "journal.h"
#include "keylist.h"
+#include "snapshot.h"
#include "super.h"
#include "xattr.h"
#include <linux/aio.h>
#include <linux/backing-dev.h>
#include <linux/compat.h>
+#include <linux/jhash.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <linux/random.h>
@@ -72,7 +75,8 @@ int __must_check __bch_write_inode(struct cache_set *c,
bch_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(inum, 0));
do {
- struct bkey_s_c k = bch_btree_iter_peek_with_holes(&iter);
+ struct bkey_s_c k =
+ bch_btree_iter_peek_snapshot_with_holes(&iter, ei->snapshot);
if (WARN_ONCE(!k.k || k.k->type != BCH_INODE_FS,
"inode %llu not found when updating", inum)) {
@@ -98,7 +102,7 @@ int __must_check __bch_write_inode(struct cache_set *c,
bi->i_mtime = cpu_to_le64(timespec_to_ns(&inode->i_mtime));
bi->i_ctime = cpu_to_le64(timespec_to_ns(&inode->i_ctime));
- ret = bch_btree_insert_at(&iter, &new_inode.k_i,
+ ret = bch_btree_insert_at(ei->snapshot, &iter, &new_inode.k_i,
NULL, NULL, &ei->journal_seq,
BTREE_INSERT_ATOMIC|
BTREE_INSERT_NOFAIL);
@@ -120,24 +124,58 @@ int __must_check bch_write_inode(struct cache_set *c,
return __bch_write_inode(c, ei, NULL, NULL);
}
-static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
+struct bch_inode_key {
+ struct snapshot *snapshot;
+ u64 inum;
+};
+
+static int bch_vfs_inode_test(struct inode *inode, void *_key)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct bch_inode_key *key = _key;
+
+ return ei->snapshot == key->snapshot &&
+ inode->i_ino == key->inum;
+}
+
+static int bch_vfs_inode_set(struct inode *inode, void *_key)
+{
+ struct bch_inode_info *ei = to_bch_ei(inode);
+ struct bch_inode_key *key = _key;
+
+ ei->snapshot = key->snapshot;
+ inode->i_ino = key->inum;
+ return 0;
+}
+
+static unsigned long bch_vfs_inode_hash(struct bch_inode_key *key)
+{
+ return jhash2((void *) key, sizeof(*key) / sizeof(u32), 0);
+}
+
+static struct inode *bch_vfs_inode_get(struct super_block *sb,
+ struct snapshot *snapshot,
+ u64 inum)
{
struct cache_set *c = sb->s_fs_info;
struct inode *inode;
struct btree_iter iter;
struct bkey_s_c k;
+ struct bch_inode_key key = { snapshot, inum };
int ret;
- pr_debug("inum %llu", inum);
-
- inode = iget_locked(sb, inum);
+ inode = iget5_locked(sb,
+ bch_vfs_inode_hash(&key),
+ bch_vfs_inode_test,
+ bch_vfs_inode_set,
+ &key);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
+ if (likely(!(inode->i_state & I_NEW)))
return inode;
bch_btree_iter_init(&iter, c, BTREE_ID_INODES, POS(inum, 0));
- k = bch_btree_iter_peek_with_holes(&iter);
+ k = bch_btree_iter_peek_snapshot_with_holes(&iter, snapshot);
if (!k.k || k.k->type != BCH_INODE_FS) {
ret = bch_btree_iter_unlock(&iter);
iget_failed(inode);
@@ -153,7 +191,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
}
static struct inode *bch_vfs_inode_create(struct cache_set *c,
- struct inode *parent,
+ struct inode *dir,
umode_t mode, dev_t rdev)
{
struct inode *inode;
@@ -162,22 +200,24 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
struct bch_inode *bi;
struct bkey_i_inode bkey_inode;
struct timespec ts = CURRENT_TIME;
+ struct bch_inode_key key;
s64 now = timespec_to_ns(&ts);
int ret;
- inode = new_inode(parent->i_sb);
+ inode = new_inode(dir->i_sb);
if (unlikely(!inode))
return ERR_PTR(-ENOMEM);
- inode_init_owner(inode, parent, mode);
+ inode_init_owner(inode, dir, mode);
- ret = posix_acl_create(parent, &inode->i_mode, &default_acl, &acl);
+ ret = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
if (ret) {
make_bad_inode(inode);
goto err;
}
ei = to_bch_ei(inode);
+ ei->snapshot = to_bch_ei(dir)->snapshot;
bi = &bkey_inode_init(&bkey_inode.k_i)->v;
bi->i_uid = cpu_to_le32(i_uid_read(inode));
@@ -193,7 +233,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed));
SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type);
- ret = bch_inode_create(c, &bkey_inode.k_i,
+ ret = bch_inode_create(c, ei->snapshot, &bkey_inode.k_i,
BLOCKDEV_INODE_MAX, 0,
&c->unused_inode_hint);
if (unlikely(ret)) {
@@ -219,6 +259,18 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
goto err;
}
+ key = (struct bch_inode_key) { ei->snapshot, inode->i_ino };
+
+ if (insert_inode_locked4(inode,
+ bch_vfs_inode_hash(&key),
+ bch_vfs_inode_test,
+ &key)) {
+ cache_set_inconsistent(c,
+ "error creating new inode: inode number still in use");
+ ret = -EIO;
+ goto err;
+ }
+
insert_inode_hash(inode);
atomic_long_inc(&c->nr_inodes);
out:
@@ -283,13 +335,14 @@ static int __bch_create(struct inode *dir, struct dentry *dentry,
static struct dentry *bch_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
struct inode *inode = NULL;
u64 inum;
inum = bch_dirent_lookup(dir, &dentry->d_name);
if (inum)
- inode = bch_vfs_inode_get(dir->i_sb, inum);
+ inode = bch_vfs_inode_get(dir->i_sb, dir_ei->snapshot, inum);
return d_splice_alias(inode, dentry);
}
@@ -407,12 +460,13 @@ static int bch_rmdir(struct inode *dir, struct dentry *dentry)
{
struct cache_set *c = dir->i_sb->s_fs_info;
struct inode *inode = dentry->d_inode;
+ struct bch_inode_info *ei = to_bch_ei(inode);
int ret;
lockdep_assert_held(&inode->i_rwsem);
lockdep_assert_held(&dir->i_rwsem);
- if (bch_empty_dir(c, inode->i_ino))
+ if (bch_empty_dir(c, ei->snapshot, inode->i_ino))
return -ENOTEMPTY;
ret = bch_unlink(dir, dentry);
@@ -450,7 +504,8 @@ static int bch_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!S_ISDIR(new_inode->i_mode))
return -ENOTDIR;
- if (bch_empty_dir(c, new_inode->i_ino))
+ if (bch_empty_dir(c, to_bch_ei(new_inode)->snapshot,
+ new_inode->i_ino))
return -ENOTEMPTY;
ret = bch_dirent_rename(c,
@@ -653,6 +708,7 @@ static int bch_fill_extent(struct fiemap_extent_info *info,
static int bch_fiemap(struct inode *inode, struct fiemap_extent_info *info,
u64 start, u64 len)
{
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = inode->i_sb->s_fs_info;
struct btree_iter iter;
struct bkey_s_c k;
@@ -663,8 +719,8 @@ static int bch_fiemap(struct inode *inode, struct fiemap_extent_info *info,
if (start + len < start)
return -EINVAL;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(inode->i_ino, start >> 9), k)
+ for_each_snapshot_key(&iter, c, ei->snapshot, BTREE_ID_EXTENTS,
+ POS(inode->i_ino, start >> 9), k)
if (bkey_extent_is_data(k.k) ||
k.k->type == BCH_RESERVATION) {
if (bkey_cmp(bkey_start_pos(k.k),
@@ -1087,12 +1143,11 @@ static int bch_vfs_write_inode(struct inode *inode,
static void bch_evict_inode(struct inode *inode)
{
struct cache_set *c = inode->i_sb->s_fs_info;
+ struct bch_inode_info *ei = to_bch_ei(inode);
truncate_inode_pages_final(&inode->i_data);
if (!bch_journal_error(&c->journal) && !is_bad_inode(inode)) {
- struct bch_inode_info *ei = to_bch_ei(inode);
-
/* XXX - we want to check this stuff iff there weren't IO errors: */
BUG_ON(atomic_long_read(&ei->i_sectors_dirty_count));
BUG_ON(atomic64_read(&ei->i_sectors) != inode->i_blocks);
@@ -1101,7 +1156,7 @@ static void bch_evict_inode(struct inode *inode)
clear_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
- bch_inode_rm(c, inode->i_ino);
+ bch_inode_rm(c, ei->snapshot, inode->i_ino);
atomic_long_dec(&c->nr_inodes);
}
}
@@ -1296,6 +1351,71 @@ static const struct super_operations bch_super_operations = {
#endif
};
+static void bch_root_dentry_iput(struct dentry *dentry, struct inode *inode)
+{
+ struct cache_set *c = inode->i_sb->s_fs_info;
+ struct bch_inode_info *ei = to_bch_ei(inode);
+
+ bch_snapshot_close(c, ei->snapshot);
+ iput(inode);
+}
+
+const struct dentry_operations bch_root_dentry_operations = {
+ .d_iput = bch_root_dentry_iput,
+};
+
+static int bch_open_root(struct super_block *sb, struct snapshot *snapshot)
+{
+ if (!snapshot->root) {
+ struct inode *inode;
+ struct dentry *root;
+
+ inode = bch_vfs_inode_get(sb, snapshot, BCACHE_ROOT_INO);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ root = d_make_root(inode);
+ if (!root)
+ return -ENOMEM;
+
+ root->d_op = &bch_root_dentry_operations;
+ snapshot->root = root;
+ }
+
+ return 0;
+}
+
+static bool match_qstr_prefix(const char *prefix, struct qstr str,
+ struct qstr *rest)
+{
+ size_t len = strlen(prefix);
+
+ if (len > str.len || memcmp(prefix, str.name, len))
+ return false;
+
+ *rest = str;
+ rest->name += len;
+ rest->len -= len;
+ return true;
+}
+
+static int bch_parse_fs_options(const char *options, struct qstr *snapshot_name)
+{
+ if (!options)
+ return 0;
+
+ while (*options) {
+ char *n = strchrnul(options, ',');
+ struct qstr opt = QSTR_INIT(options, n - options);
+
+ options = *n ? n + 1 : n;
+
+ match_qstr_prefix("snapshot=", opt, snapshot_name);
+ }
+
+ return 0;
+}
+
static int bch_test_super(struct super_block *s, void *data)
{
return s->s_fs_info == data;
@@ -1311,13 +1431,16 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
struct cache_set *c;
- struct cache *ca;
struct super_block *sb;
- struct inode *inode;
struct cache_set_opts opts;
- unsigned i;
+ struct qstr snapshot_name = QSTR_INIT("", 0);
+ struct snapshot *snapshot;
int ret;
+ ret = bch_parse_fs_options(data, &snapshot_name);
+ if (ret)
+ return ERR_PTR(ret);
+
ret = bch_parse_options(&opts, flags, data);
if (ret)
return ERR_PTR(ret);
@@ -1332,63 +1455,74 @@ static struct dentry *bch_mount(struct file_system_type *fs_type,
return ERR_CAST(sb);
}
- BUG_ON(sb->s_fs_info != c);
+ if (!sb->s_root) {
+ struct cache *ca;
+ unsigned i;
+
+ /* XXX: blocksize */
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_op = &bch_super_operations;
+ sb->s_xattr = bch_xattr_handlers;
+ sb->s_magic = BCACHE_STATFS_MAGIC;
+ sb->s_time_gran = 1;
+ c->vfs_sb = sb;
+ sb->s_bdi = &c->bdi;
+
+ rcu_read_lock();
+ for_each_cache_rcu(ca, c, i) {
+ struct block_device *bdev = ca->disk_sb.bdev;
+
+ BUILD_BUG_ON(sizeof(sb->s_id) < BDEVNAME_SIZE);
+
+ bdevname(bdev, sb->s_id);
+
+ /* XXX: do we even need s_bdev? */
+ sb->s_bdev = bdev;
+ sb->s_dev = bdev->bd_dev;
+ break;
+ }
+ rcu_read_unlock();
- if (sb->s_root) {
+ if (opts.posix_acl < 0)
+ sb->s_flags |= MS_POSIXACL;
+ else
+ sb->s_flags |= opts.posix_acl ? MS_POSIXACL : 0;
+ } else {
closure_put(&c->cl);
if ((flags ^ sb->s_flags) & MS_RDONLY) {
ret = -EBUSY;
goto err_put_super;
}
- goto out;
}
- /* XXX: blocksize */
- sb->s_blocksize = PAGE_SIZE;
- sb->s_blocksize_bits = PAGE_SHIFT;
- sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_op = &bch_super_operations;
- sb->s_xattr = bch_xattr_handlers;
- sb->s_magic = BCACHE_STATFS_MAGIC;
- sb->s_time_gran = 1;
- c->vfs_sb = sb;
- sb->s_bdi = &c->bdi;
-
- rcu_read_lock();
- for_each_cache_rcu(ca, c, i) {
- struct block_device *bdev = ca->disk_sb.bdev;
-
- BUILD_BUG_ON(sizeof(sb->s_id) < BDEVNAME_SIZE);
-
- bdevname(bdev, sb->s_id);
-
- /* XXX: do we even need s_bdev? */
- sb->s_bdev = bdev;
- sb->s_dev = bdev->bd_dev;
- break;
+ snapshot = bch_snapshot_open(c, snapshot_name);
+ if (IS_ERR(snapshot)) {
+ bch_err(c, "mount error: error opening snapshot");
+ ret = PTR_ERR(snapshot);
+ goto err_put_super;
}
- rcu_read_unlock();
-
- if (opts.posix_acl < 0)
- sb->s_flags |= MS_POSIXACL;
- else
- sb->s_flags |= opts.posix_acl ? MS_POSIXACL : 0;
- inode = bch_vfs_inode_get(sb, BCACHE_ROOT_INO);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
+ ret = bch_open_root(sb, snapshot);
+ if (ret) {
+ bch_err(c, "mount error: error opening root directory");
goto err_put_super;
}
- sb->s_root = d_make_root(inode);
if (!sb->s_root) {
- ret = -ENOMEM;
- goto err_put_super;
+ ret = bch_open_root(sb, c->snapshot_root);
+ if (ret) {
+ bch_err(c, "mount error: error opening root directory");
+ goto err_put_super;
+ }
+
+ sb->s_root = c->snapshot_root->root;
}
sb->s_flags |= MS_ACTIVE;
-out:
+
return dget(sb->s_root);
err_put_super:
diff --git a/drivers/md/bcache/fs.h b/drivers/md/bcache/fs.h
index e8f627c6ba45..cd37c1a4c635 100644
--- a/drivers/md/bcache/fs.h
+++ b/drivers/md/bcache/fs.h
@@ -5,9 +5,13 @@
#include <linux/seqlock.h>
+struct snapshot;
+
struct bch_inode_info {
struct inode vfs_inode;
+ struct snapshot *snapshot;
+
struct mutex update_lock;
u64 journal_seq;
diff --git a/drivers/md/bcache/inode.c b/drivers/md/bcache/inode.c
index 58821fd75463..4710b8802fad 100644
--- a/drivers/md/bcache/inode.c
+++ b/drivers/md/bcache/inode.c
@@ -108,8 +108,8 @@ const struct bkey_ops bch_bkey_inode_ops = {
.val_to_text = bch_inode_to_text,
};
-int bch_inode_create(struct cache_set *c, struct bkey_i *inode,
- u64 min, u64 max, u64 *hint)
+int bch_inode_create(struct cache_set *c, struct snapshot *snapshot,
+ struct bkey_i *inode, u64 min, u64 max, u64 *hint)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -130,6 +130,7 @@ int bch_inode_create(struct cache_set *c, struct bkey_i *inode,
again:
bch_btree_iter_init_intent(&iter, c, BTREE_ID_INODES, POS(*hint, 0));
+ /* We're picking an inode number that's unused in any snapshot: */
while ((k = bch_btree_iter_peek_with_holes(&iter)).k) {
if (k.k->p.inode >= max)
break;
@@ -140,8 +141,9 @@ again:
pr_debug("inserting inode %llu (size %u)",
inode->k.p.inode, inode->k.u64s);
- ret = bch_btree_insert_at(&iter, inode, NULL, NULL,
- NULL, BTREE_INSERT_ATOMIC);
+ ret = bch_btree_insert_at(snapshot, &iter, inode,
+ NULL, NULL, NULL,
+ BTREE_INSERT_ATOMIC);
if (ret == -EINTR)
continue;
@@ -168,23 +170,26 @@ again:
return -ENOSPC;
}
-int bch_inode_truncate(struct cache_set *c, u64 inode_nr, u64 new_size,
+int bch_inode_truncate(struct cache_set *c, struct snapshot *snapshot,
+ u64 inode_nr, u64 new_size,
struct extent_insert_hook *hook, u64 *journal_seq)
{
- return bch_discard(c, POS(inode_nr, new_size), POS(inode_nr + 1, 0),
+ return bch_discard(c, snapshot,
+ POS(inode_nr, new_size),
+ POS(inode_nr + 1, 0),
0, NULL, hook, journal_seq);
}
-int bch_inode_rm(struct cache_set *c, u64 inode_nr)
+int bch_inode_rm(struct cache_set *c, struct snapshot *snapshot, u64 inode_nr)
{
struct bkey_i delete;
int ret;
- ret = bch_inode_truncate(c, inode_nr, 0, NULL, NULL);
+ ret = bch_inode_truncate(c, snapshot, inode_nr, 0, NULL, NULL);
if (ret < 0)
return ret;
- ret = bch_btree_delete_range(c, BTREE_ID_XATTRS,
+ ret = bch_btree_delete_range(c, snapshot, BTREE_ID_XATTRS,
POS(inode_nr, 0),
POS(inode_nr + 1, 0),
0, NULL, NULL, NULL);
@@ -199,7 +204,7 @@ int bch_inode_rm(struct cache_set *c, u64 inode_nr)
* XXX: the dirent could ideally would delete whitouts when they're no
* longer needed
*/
- ret = bch_btree_delete_range(c, BTREE_ID_DIRENTS,
+ ret = bch_btree_delete_range(c, snapshot, BTREE_ID_DIRENTS,
POS(inode_nr, 0),
POS(inode_nr + 1, 0),
0, NULL, NULL, NULL);
@@ -209,18 +214,19 @@ int bch_inode_rm(struct cache_set *c, u64 inode_nr)
bkey_init(&delete.k);
delete.k.p.inode = inode_nr;
- return bch_btree_insert(c, BTREE_ID_INODES, &delete, NULL,
- NULL, NULL, BTREE_INSERT_NOFAIL);
+ return bch_btree_insert(c, snapshot, BTREE_ID_INODES, &delete,
+ NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
}
-int bch_inode_update(struct cache_set *c, struct bkey_i *inode,
- u64 *journal_seq)
+int bch_inode_update(struct cache_set *c, struct snapshot *snapshot,
+ struct bkey_i *inode, u64 *journal_seq)
{
- return bch_btree_update(c, BTREE_ID_INODES, inode, journal_seq);
+ return bch_btree_update(c, snapshot, BTREE_ID_INODES,
+ inode, journal_seq);
}
-int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
- struct bkey_i_inode *inode)
+int bch_inode_find_by_inum(struct cache_set *c, struct snapshot *snapshot,
+ u64 inode_nr, struct bkey_i_inode *inode)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -246,13 +252,14 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
return ret;
}
-int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid,
- struct bkey_i_inode_blockdev *ret)
+int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, struct snapshot *snapshot,
+ uuid_le *uuid, struct bkey_i_inode_blockdev *ret)
{
struct btree_iter iter;
struct bkey_s_c k;
- for_each_btree_key(&iter, c, BTREE_ID_INODES, POS(0, 0), k) {
+ for_each_snapshot_key(&iter, c, snapshot, BTREE_ID_INODES,
+ POS(0, 0), k) {
if (k.k->p.inode >= BLOCKDEV_INODE_MAX)
break;
diff --git a/drivers/md/bcache/inode.h b/drivers/md/bcache/inode.h
index a89064a37861..7373762521a5 100644
--- a/drivers/md/bcache/inode.h
+++ b/drivers/md/bcache/inode.h
@@ -6,14 +6,17 @@ extern const struct bkey_ops bch_bkey_inode_ops;
ssize_t bch_inode_status(char *, size_t, const struct bkey *);
-int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *);
-int bch_inode_truncate(struct cache_set *, u64, u64,
+int bch_inode_create(struct cache_set *, struct snapshot *,
+ struct bkey_i *, u64, u64, u64 *);
+int bch_inode_truncate(struct cache_set *, struct snapshot *, u64, u64,
struct extent_insert_hook *, u64 *);
-int bch_inode_rm(struct cache_set *, u64);
-int bch_inode_update(struct cache_set *, struct bkey_i *, u64 *);
+int bch_inode_rm(struct cache_set *, struct snapshot *, u64);
+int bch_inode_update(struct cache_set *, struct snapshot *,
+ struct bkey_i *, u64 *);
-int bch_inode_find_by_inum(struct cache_set *, u64, struct bkey_i_inode *);
-int bch_cached_dev_inode_find_by_uuid(struct cache_set *, uuid_le *,
- struct bkey_i_inode_blockdev *);
+int bch_inode_find_by_inum(struct cache_set *, struct snapshot *,
+ u64, struct bkey_i_inode *);
+int bch_cached_dev_inode_find_by_uuid(struct cache_set *, struct snapshot *,
+ uuid_le *, struct bkey_i_inode_blockdev *);
#endif
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index a7e0bc931434..7114ecd6d8c9 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -711,9 +711,8 @@ static int bch_write_index_default(struct bch_write_op *op)
bch_btree_iter_init_intent(&iter, op->c, BTREE_ID_EXTENTS,
bkey_start_pos(&bch_keylist_front(keys)->k));
- ret = bch_btree_insert_list_at(&iter, keys, &op->res,
- op->insert_hook,
- op_journal_seq(op),
+ ret = bch_btree_insert_list_at(op->snapshot, &iter, keys, &op->res,
+ op->insert_hook, op_journal_seq(op),
BTREE_INSERT_NOFAIL);
bch_btree_iter_unlock(&iter);
@@ -776,7 +775,7 @@ static void bch_write_discard(struct closure *cl)
struct bio *bio = &op->bio->bio.bio;
u64 inode = op->insert_key.k.p.inode;
- op->error = bch_discard(op->c,
+ op->error = bch_discard(op->c, op->snapshot,
POS(inode, bio->bi_iter.bi_sector),
POS(inode, bio_end_sector(bio)),
op->insert_key.k.version,
@@ -1421,8 +1420,9 @@ void bch_write(struct closure *cl)
}
void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
- struct bch_write_bio *bio, struct disk_reservation res,
- struct write_point *wp, struct bkey_s_c insert_key,
+ struct snapshot *snapshot, struct bch_write_bio *bio,
+ struct disk_reservation res, struct write_point *wp,
+ struct bkey_s_c insert_key,
struct extent_insert_hook *insert_hook,
u64 *journal_seq, unsigned flags)
{
@@ -1435,6 +1435,7 @@ void bch_write_op_init(struct bch_write_op *op, struct cache_set *c,
}
op->c = c;
+ op->snapshot = snapshot;
op->io_wq = NULL;
op->bio = bio;
op->written = 0;
@@ -1497,13 +1498,14 @@ void bch_replace_init(struct bch_replace_info *r, struct bkey_s_c old)
* XXX: this needs to be refactored with inode_truncate, or more
* appropriately inode_truncate should call this
*/
-int bch_discard(struct cache_set *c, struct bpos start,
- struct bpos end, u64 version,
+int bch_discard(struct cache_set *c, struct snapshot *snapshot,
+ struct bpos start, struct bpos end, u64 version,
struct disk_reservation *disk_res,
struct extent_insert_hook *hook,
u64 *journal_seq)
{
- return bch_btree_delete_range(c, BTREE_ID_EXTENTS, start, end, version,
+ return bch_btree_delete_range(c, snapshot, BTREE_ID_EXTENTS,
+ start, end, version,
disk_res, hook, journal_seq);
}
@@ -1785,7 +1787,8 @@ static void bch_read_endio(struct bio *bio)
}
}
-void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
+void bch_read_extent_iter(struct cache_set *c, struct snapshot *snapshot,
+ struct bch_read_bio *orig,
struct bvec_iter iter, struct bkey_s_c k,
struct extent_pick_ptr *pick, unsigned flags)
{
@@ -1876,6 +1879,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
rbio->orig_bi_end_io = orig->bio.bi_end_io;
rbio->parent_iter = iter;
+ rbio->snapshot = snapshot;
rbio->inode = k.k->p.inode;
rbio->flags = flags;
rbio->bounce = bounce;
@@ -1905,7 +1909,7 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
sizeof(struct bio_vec) * rbio->bio.bi_vcnt);
bch_replace_init(&promote_op->replace, k);
- bch_write_op_init(&promote_op->iop, c,
+ bch_write_op_init(&promote_op->iop, c, SNAPSHOT_NONE,
&promote_op->bio,
(struct disk_reservation) { 0 },
&c->promote_write_point, k,
@@ -1953,16 +1957,16 @@ void bch_read_extent_iter(struct cache_set *c, struct bch_read_bio *orig,
#endif
}
-static void bch_read_iter(struct cache_set *c, struct bch_read_bio *rbio,
- struct bvec_iter bvec_iter, u64 inode,
- unsigned flags)
+static void bch_read_iter(struct cache_set *c, struct snapshot *snapshot,
+ struct bch_read_bio *rbio, struct bvec_iter bvec_iter,
+ u64 inode, unsigned flags)
{
struct bio *bio = &rbio->bio;
struct btree_iter iter;
struct bkey_s_c k;
- for_each_btree_key_with_holes(&iter, c, BTREE_ID_EXTENTS,
- POS(inode, bvec_iter.bi_sector), k) {
+ for_each_snapshot_key_with_holes(&iter, c, snapshot, BTREE_ID_EXTENTS,
+ POS(inode, bvec_iter.bi_sector), k) {
BKEY_PADDED(k) tmp;
struct extent_pick_ptr pick;
unsigned bytes, sectors;
@@ -1997,7 +2001,7 @@ static void bch_read_iter(struct cache_set *c, struct bch_read_bio *rbio,
PTR_BUCKET(pick.ca, &pick.ptr)->read_prio =
c->prio_clock[READ].hand;
- bch_read_extent_iter(c, rbio, bvec_iter,
+ bch_read_extent_iter(c, snapshot, rbio, bvec_iter,
k, &pick, flags);
flags &= ~BCH_READ_MAY_REUSE_BIO;
@@ -2024,11 +2028,12 @@ static void bch_read_iter(struct cache_set *c, struct bch_read_bio *rbio,
bio_endio(bio);
}
-void bch_read(struct cache_set *c, struct bch_read_bio *bio, u64 inode)
+void bch_read(struct cache_set *c, struct snapshot *snapshot,
+ struct bch_read_bio *bio, u64 inode)
{
bch_increment_clock(c, bio_sectors(&bio->bio), READ);
- bch_read_iter(c, bio, bio->bio.bi_iter, inode,
+ bch_read_iter(c, snapshot, bio, bio->bio.bi_iter, inode,
BCH_READ_FORCE_BOUNCE|
BCH_READ_RETRY_IF_STALE|
BCH_READ_PROMOTE|
@@ -2043,6 +2048,7 @@ static void bch_read_retry(struct cache_set *c, struct bch_read_bio *rbio)
{
struct bch_read_bio *parent = bch_rbio_parent(rbio);
struct bvec_iter iter = rbio->parent_iter;
+ struct snapshot *snapshot = rbio->snapshot;
u64 inode = rbio->inode;
trace_bcache_read_retry(&rbio->bio);
@@ -2052,7 +2058,7 @@ static void bch_read_retry(struct cache_set *c, struct bch_read_bio *rbio)
else
rbio->bio.bi_end_io = rbio->orig_bi_end_io;
- bch_read_iter(c, parent, iter, inode,
+ bch_read_iter(c, snapshot, parent, iter, inode,
BCH_READ_FORCE_BOUNCE|
BCH_READ_RETRY_IF_STALE|
BCH_READ_PROMOTE);
diff --git a/drivers/md/bcache/io.h b/drivers/md/bcache/io.h
index b244161d322a..671b2420d9eb 100644
--- a/drivers/md/bcache/io.h
+++ b/drivers/md/bcache/io.h
@@ -6,6 +6,8 @@
#include <linux/lz4.h>
#include <linux/zlib.h>
+struct snapshot;
+
#define COMPRESSION_WORKSPACE_SIZE \
max_t(size_t, zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),\
max_t(size_t, zlib_inflate_workspacesize(), \
@@ -37,7 +39,7 @@ static inline u64 *op_journal_seq(struct bch_write_op *op)
}
void bch_write_op_init(struct bch_write_op *, struct cache_set *,
- struct bch_write_bio *,
+ struct snapshot *, struct bch_write_bio *,
struct disk_reservation, struct write_point *,
struct bkey_s_c,
struct extent_insert_hook *, u64 *, unsigned);
@@ -49,17 +51,19 @@ struct cache_promote_op;
struct extent_pick_ptr;
-void bch_read_extent_iter(struct cache_set *, struct bch_read_bio *,
- struct bvec_iter, struct bkey_s_c k,
- struct extent_pick_ptr *, unsigned);
+void bch_read_extent_iter(struct cache_set *, struct snapshot *,
+ struct bch_read_bio *, struct bvec_iter,
+ struct bkey_s_c k, struct extent_pick_ptr *,
+ unsigned);
static inline void bch_read_extent(struct cache_set *c,
+ struct snapshot *snapshot,
struct bch_read_bio *orig,
struct bkey_s_c k,
struct extent_pick_ptr *pick,
unsigned flags)
{
- bch_read_extent_iter(c, orig, orig->bio.bi_iter,
+ bch_read_extent_iter(c, snapshot, orig, orig->bio.bi_iter,
k, pick, flags);
}
@@ -71,7 +75,7 @@ enum bch_read_flags {
BCH_READ_MAY_REUSE_BIO = 1 << 4,
};
-void bch_read(struct cache_set *, struct bch_read_bio *, u64);
+void bch_read(struct cache_set *, struct snapshot *, struct bch_read_bio *, u64);
void bch_bbio_endio(struct bbio *);
@@ -82,8 +86,8 @@ void bch_submit_bbio(struct bbio *, struct cache *,
void bch_submit_bbio_replicas(struct bch_write_bio *, struct cache_set *,
const struct bkey_i *, unsigned, bool);
-int bch_discard(struct cache_set *, struct bpos, struct bpos,
- u64, struct disk_reservation *,
+int bch_discard(struct cache_set *, struct snapshot *, struct bpos,
+ struct bpos, u64, struct disk_reservation *,
struct extent_insert_hook *, u64 *);
void __cache_promote(struct cache_set *, struct bbio *,
diff --git a/drivers/md/bcache/io_types.h b/drivers/md/bcache/io_types.h
index 50856c6ce207..b0e21ca80559 100644
--- a/drivers/md/bcache/io_types.h
+++ b/drivers/md/bcache/io_types.h
@@ -8,6 +8,8 @@
#include <linux/llist.h>
#include <linux/workqueue.h>
+struct snapshot;
+
/* XXX kill kill kill */
struct bbio {
struct cache *ca;
@@ -44,6 +46,7 @@ struct bch_read_bio {
*
* But we need to stash the inode somewhere:
*/
+ struct snapshot *snapshot;
u64 inode;
unsigned submit_time_us;
@@ -88,6 +91,7 @@ struct bch_replace_info {
struct bch_write_op {
struct closure cl;
struct cache_set *c;
+ struct snapshot *snapshot;
struct workqueue_struct *io_wq;
struct bch_write_bio *bio;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index f484b5d49c59..69c04b6c723b 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -1262,7 +1262,8 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list)
trace_bcache_journal_replay_key(&k->k);
- ret = bch_btree_insert(c, entry->btree_id, k,
+ ret = bch_btree_insert(c, SNAPSHOT_NONE,
+ entry->btree_id, k,
&disk_res, NULL, NULL,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_NO_MARK_KEY);
diff --git a/drivers/md/bcache/migrate.c b/drivers/md/bcache/migrate.c
index 236e0e469b4f..66da3a5d652a 100644
--- a/drivers/md/bcache/migrate.c
+++ b/drivers/md/bcache/migrate.c
@@ -54,10 +54,9 @@ static int issue_migration_move(struct cache *ca,
bch_replace_init(&io->replace, k);
- bch_write_op_init(&io->op, c, &io->wbio, res,
- &c->migration_write_point,
- k, &io->replace.hook, NULL,
- 0);
+ bch_write_op_init(&io->op, c, SNAPSHOT_NONE, &io->wbio, res,
+ &c->migration_write_point, k,
+ &io->replace.hook, NULL, 0);
io->op.nr_replicas = 1;
io->op.io_wq = q->wq;
@@ -384,8 +383,9 @@ static int bch_flag_key_bad(struct btree_iter *iter,
*/
bch_extent_normalize(c, e.s);
- return bch_btree_insert_at(iter, &tmp.key, NULL, NULL,
- NULL, BTREE_INSERT_ATOMIC);
+ return bch_btree_insert_at(SNAPSHOT_NONE, iter, &tmp.key,
+ NULL, NULL, NULL,
+ BTREE_INSERT_ATOMIC);
}
/*
diff --git a/drivers/md/bcache/move.c b/drivers/md/bcache/move.c
index bbfcbdae2f37..922b2d2e6e3a 100644
--- a/drivers/md/bcache/move.c
+++ b/drivers/md/bcache/move.c
@@ -467,7 +467,7 @@ static void __bch_data_move(struct closure *cl)
io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(&io->key.k);
io->rbio.bio.bi_end_io = read_moving_endio;
- bch_read_extent(io->op.c, &io->rbio,
+ bch_read_extent(io->op.c, SNAPSHOT_NONE, &io->rbio,
bkey_i_to_s_c(&io->key),
&pick, BCH_READ_IS_LAST);
}
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 2f5f59ba6177..232d97021acd 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -60,7 +60,7 @@ static int issue_moving_gc_move(struct moving_queue *q,
bch_replace_init(&io->replace, bkey_i_to_s_c(k));
- bch_write_op_init(&io->op, c, &io->wbio,
+ bch_write_op_init(&io->op, c, SNAPSHOT_NONE, &io->wbio,
(struct disk_reservation) { 0 },
NULL, bkey_i_to_s_c(k),
&io->replace.hook, NULL,
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 67c8d3f3d145..25abdff41c75 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -358,7 +358,8 @@ static int cached_dev_cache_miss(struct btree_iter *iter, struct search *s,
bio->bi_iter.bi_sector + sectors,
sectors);
- ret = bch_btree_insert_check_key(iter, &replace.key);
+ ret = bch_btree_insert_check_key(iter->c->snapshot_root,
+ iter, &replace.key);
if (ret == -EINTR)
return ret;
@@ -395,12 +396,14 @@ static void cached_dev_read(struct cached_dev *dc, struct search *s)
{
struct closure *cl = &s->cl;
struct bio *bio = &s->rbio.bio;
+ struct cache_set *c = s->iop.c;
struct btree_iter iter;
struct bkey_s_c k;
- bch_increment_clock(s->iop.c, bio_sectors(bio), READ);
+ bch_increment_clock(c, bio_sectors(bio), READ);
- for_each_btree_key_with_holes(&iter, s->iop.c, BTREE_ID_EXTENTS,
+ for_each_snapshot_key_with_holes(&iter, c, c->snapshot_root,
+ BTREE_ID_EXTENTS,
POS(s->inode, bio->bi_iter.bi_sector), k) {
BKEY_PADDED(k) tmp;
struct extent_pick_ptr pick;
@@ -411,9 +414,9 @@ retry:
bch_btree_iter_unlock(&iter);
k = bkey_i_to_s_c(&tmp.k);
- bch_extent_pick_ptr(s->iop.c, k, &pick);
+ bch_extent_pick_ptr(c, k, &pick);
if (IS_ERR(pick.ca)) {
- bcache_io_error(s->iop.c, bio,
+ bcache_io_error(c, bio,
"no device to read from");
goto out;
}
@@ -426,12 +429,13 @@ retry:
if (pick.ca) {
PTR_BUCKET(pick.ca, &pick.ptr)->read_prio =
- s->iop.c->prio_clock[READ].hand;
+ c->prio_clock[READ].hand;
if (!bkey_extent_is_cached(k.k))
s->read_dirty_data = true;
- bch_read_extent(s->iop.c, &s->rbio, k, &pick,
+ bch_read_extent(c, c->snapshot_root,
+ &s->rbio, k, &pick,
BCH_READ_FORCE_BOUNCE|
BCH_READ_RETRY_IF_STALE|
(!s->bypass ? BCH_READ_PROMOTE : 0)|
@@ -439,7 +443,8 @@ retry:
} else {
/* not present (hole), or stale cached data */
if (cached_dev_cache_miss(&iter, s, bio, sectors)) {
- k = bch_btree_iter_peek_with_holes(&iter);
+ k = bch_btree_iter_peek_snapshot_with_holes(&iter,
+ c->snapshot_root);
goto retry;
}
}
@@ -458,7 +463,7 @@ retry:
* reading a btree node
*/
BUG_ON(!bch_btree_iter_unlock(&iter));
- bcache_io_error(s->iop.c, bio, "btree IO error");
+ bcache_io_error(c, bio, "btree IO error");
out:
continue_at(cl, cached_dev_read_done_bh, NULL);
}
@@ -554,10 +559,9 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
if (bypass)
flags |= BCH_WRITE_DISCARD;
- bch_write_op_init(&s->iop, dc->disk.c, &s->wbio,
- (struct disk_reservation) { 0 }, NULL,
- bkey_to_s_c(&insert_key),
- NULL, NULL, flags);
+ bch_write_op_init(&s->iop, dc->disk.c, dc->disk.c->snapshot_root,
+ &s->wbio, (struct disk_reservation) { 0 }, NULL,
+ bkey_to_s_c(&insert_key), NULL, NULL, flags);
closure_call(&s->iop.cl, bch_write, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
@@ -699,7 +703,8 @@ static void __blockdev_volume_make_request(struct request_queue *q,
if (bio->bi_rw & REQ_DISCARD)
flags |= BCH_WRITE_DISCARD;
- bch_write_op_init(&s->iop, d->c, &s->wbio, res, NULL,
+ bch_write_op_init(&s->iop, d->c, d->c->snapshot_root,
+ &s->wbio, res, NULL,
bkey_to_s_c(&KEY(s->inode,
bio_end_sector(&s->wbio.bio.bio),
bio_sectors(&s->wbio.bio.bio))),
@@ -708,7 +713,8 @@ static void __blockdev_volume_make_request(struct request_queue *q,
closure_call(&s->iop.cl, bch_write, NULL, &s->cl);
} else {
closure_get(&s->cl);
- bch_read(d->c, &s->rbio, bcache_dev_inum(d));
+ bch_read(d->c, d->c->snapshot_root,
+ &s->rbio, bcache_dev_inum(d));
}
continue_at(&s->cl, search_free, NULL);
}
diff --git a/drivers/md/bcache/snapshot.c b/drivers/md/bcache/snapshot.c
new file mode 100644
index 000000000000..488434bb5324
--- /dev/null
+++ b/drivers/md/bcache/snapshot.c
@@ -0,0 +1,617 @@
+#include "bcache.h"
+#include "btree_update.h"
+#include "fs-gc.h"
+#include "snapshot.h"
+#include "str_hash.h"
+
+struct snapshot_cached_entry *__bch_snapshot_entry(struct cache_set *c, u32 id)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_snapshot_node snapshot;
+ struct snapshot_cached_entry *entry;
+
+retry:
+ bch_btree_iter_init(&iter, c, BTREE_ID_SNAPSHOT_TREE, POS(0, id));
+ k = bch_btree_iter_peek_with_holes(&iter);
+
+ /* XXX */
+ BUG_ON(!k.k || k.k->type != BCH_SNAPSHOT_NODE);
+ snapshot = bkey_s_c_to_snapshot_node(k);
+
+ entry = kmalloc(sizeof(*entry), GFP_NOFS);
+ BUG_ON(!entry);
+
+ entry->id = id;
+ entry->parent = le64_to_cpu(snapshot.v->parent);
+ entry->start = le64_to_cpu(snapshot.v->start);
+ entry->end = le64_to_cpu(snapshot.v->end);
+ bch_btree_iter_unlock(&iter);
+
+ if (!rhashtable_lookup_insert_fast(&c->snapshot_cache,
+ &entry->hash,
+ bch_snapshot_cache_params))
+ return entry;
+
+ kfree(entry);
+
+ entry = rhashtable_lookup_fast(&c->snapshot_cache, &id,
+ bch_snapshot_cache_params);
+ if (entry)
+ return entry;
+ goto retry;
+}
+
+/* Snapshot names are indexed by hash (like dirents and xattrs): */
+
+static unsigned snapshot_name_bytes(struct bkey_s_c_snapshot_name s)
+{
+ unsigned len = bkey_val_bytes(s.k) - sizeof(struct bch_snapshot_name);
+
+ while (len && !s.v->name[len - 1])
+ --len;
+
+ return len;
+}
+
+static u64 bch_snapshot_hash(const struct bch_hash_info *info,
+ const struct qstr *name)
+{
+ struct bch_str_hash_ctx ctx;
+
+ bch_str_hash_init(&ctx, info->type);
+ bch_str_hash_update(&ctx, info->type, &info->seed, sizeof(info->seed));
+ bch_str_hash_update(&ctx, info->type, name->name, name->len);
+
+ return bch_str_hash_end(&ctx, info->type);
+}
+
+static u64 snapshot_hash_key(const struct bch_hash_info *info, const void *key)
+{
+ return bch_snapshot_hash(info, key);
+}
+
+static u64 snapshot_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot_name s = bkey_s_c_to_snapshot_name(k);
+ struct qstr name = {
+ .name = s.v->name,
+ .len = snapshot_name_bytes(s),
+ };
+
+ return bch_snapshot_hash(info, &name);
+}
+
+static bool snapshot_cmp_key(struct bkey_s_c _l, const void *_r)
+{
+ struct bkey_s_c_snapshot_name l = bkey_s_c_to_snapshot_name(_l);
+ int len = snapshot_name_bytes(l);
+ const struct qstr *r = _r;
+
+ return len - r->len ?: memcmp(l.v->name, r->name, len);
+}
+
+static bool snapshot_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
+{
+ struct bkey_s_c_snapshot_name l = bkey_s_c_to_snapshot_name(_l);
+ struct bkey_s_c_snapshot_name r = bkey_s_c_to_snapshot_name(_r);
+ int l_len = snapshot_name_bytes(l);
+ int r_len = snapshot_name_bytes(r);
+
+ return l_len - r_len ?: memcmp(l.v->name, r.v->name, l_len);
+}
+
+static const struct bch_hash_desc snapshot_hash_desc = {
+ .btree_id = BTREE_ID_SNAPSHOT_NAMES,
+ .key_type = BCH_SNAPSHOT_NAME,
+ .whiteout_type = BCH_SNAPSHOT_NAME_WHITEOUT,
+ .hash_key = snapshot_hash_key,
+ .hash_bkey = snapshot_hash_bkey,
+ .cmp_key = snapshot_cmp_key,
+ .cmp_bkey = snapshot_cmp_bkey,
+};
+
+/* Bkey/btree ops: */
+
+static const char *bch_snapshot_name_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case BCH_SNAPSHOT_NAME:
+ return bkey_val_bytes(k.k) < sizeof(struct bch_snapshot_name)
+ ? "value too small"
+ : NULL;
+
+ case BCH_SNAPSHOT_NAME_WHITEOUT:
+ return bkey_val_bytes(k.k) != 0
+ ? "value size should be zero"
+ : NULL;
+
+ default:
+ return "invalid type";
+ }
+}
+
+static void bch_snapshot_name_to_text(struct cache_set *c, char *buf,
+ size_t size, struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot_name s;
+
+ switch (k.k->type) {
+ case BCH_SNAPSHOT_NAME:
+ s = bkey_s_c_to_snapshot_name(k);
+
+ if (size) {
+ unsigned n = min_t(unsigned, size,
+ snapshot_name_bytes(s));
+ memcpy(buf, s.v->name, n);
+ buf[size - 1] = '\0';
+ buf += n;
+ size -= n;
+ }
+
+ scnprintf(buf, size, " -> %llu", le64_to_cpu(s.v->id));
+ break;
+ case BCH_SNAPSHOT_NAME_WHITEOUT:
+ scnprintf(buf, size, "whiteout");
+ break;
+ }
+}
+
+const struct btree_keys_ops bch_snapshot_name_ops = {
+};
+
+const struct bkey_ops bch_bkey_snapshot_name_ops = {
+ .key_invalid = bch_snapshot_name_invalid,
+ .val_to_text = bch_snapshot_name_to_text,
+};
+
+static const char *bch_snapshot_node_invalid(const struct cache_set *c,
+ struct bkey_s_c k)
+{
+ switch (k.k->type) {
+ case BCH_SNAPSHOT_NODE:
+ return bkey_val_bytes(k.k) != sizeof(struct bch_snapshot_node)
+ ? "value too small"
+ : NULL;
+
+ default:
+ return "invalid type";
+ }
+}
+
+static void bch_snapshot_node_to_text(struct cache_set *c, char *buf,
+ size_t size, struct bkey_s_c k)
+{
+ struct bkey_s_c_snapshot_node s;
+
+ switch (k.k->type) {
+ case BCH_SNAPSHOT_NODE:
+ s = bkey_s_c_to_snapshot_node(k);
+
+ scnprintf(buf, size, " parent %llu start %llu end %llu",
+ le64_to_cpu(s.v->parent),
+ le64_to_cpu(s.v->start),
+ le64_to_cpu(s.v->end));
+ break;
+ }
+}
+
+const struct btree_keys_ops bch_snapshot_tree_ops = {
+};
+
+const struct bkey_ops bch_bkey_snapshot_tree_ops = {
+ .key_invalid = bch_snapshot_node_invalid,
+ .val_to_text = bch_snapshot_node_to_text,
+};
+
+static int bch_snapshot_alloc_ids(struct cache_set *c, u32 new_ids[2], u32 id)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+
+ /*
+ * Allocate node ids for two children of node at @id:
+ *
+ * Snapshot node ids start at the maximum possible value and grow down:
+ * children must always have smaller node ids than their parents.
+ *
+ * First, find the smallest existing node and grab the two ids below
+ * that: then, keep scanning for any unused slots (holes) smaller than
+ * id, from old snapshots that may have been deleted:
+ */
+ bch_btree_iter_init(&iter, c, BTREE_ID_SNAPSHOT_TREE, POS_MIN);
+ k = bch_btree_iter_peek(&iter);
+ bch_btree_iter_unlock(&iter);
+
+ /* Should be at least one snapshot_node already.. */
+ if (!k.k)
+ return -ENOENT;
+
+ BUG_ON(k.k->p.offset > id);
+
+ if (k.k->p.offset <= 3)
+ return -ENOSPC;
+
+ new_ids[0] = k.k->p.offset - 2;
+ new_ids[1] = k.k->p.offset - 1;
+
+ while (1) {
+ bch_btree_iter_advance_pos(&iter);
+ k = bch_btree_iter_peek_with_holes(&iter);
+ if (!k.k || k.k->p.offset >= id)
+ break;
+
+ if (k.k->type != BCH_SNAPSHOT_NODE) {
+ new_ids[0] = new_ids[1];
+ new_ids[1] = k.k->p.offset;
+ }
+ }
+
+ bch_btree_iter_unlock(&iter);
+ return 0;
+}
+
+/* Make room for more children under id: */
+static int bch_snapshot_renumber_one(struct btree_iter *iter,
+ struct bkey_s_c_snapshot_node n,
+ u64 start, u64 end, u64 shift)
+{
+ struct snapshot_cached_entry *entry;
+ struct bkey_i_snapshot_node new_node;
+ u32 id = n.k->p.offset;
+ int ret;
+
+ bkey_reassemble(&new_node.k_i, n.s_c);
+
+ if (start >= le64_to_cpu(new_node.v.start) &&
+ end <= le64_to_cpu(new_node.v.end)) {
+ /*
+ * Ancestor of the node we want to expand, so we're expanding
+ * this node's range too:
+ */
+ le64_add_cpu(&new_node.v.end, shift);
+ } else if (end < le64_to_cpu(new_node.v.end)) {
+ /*
+ * Not an ancestor, but higher in the range space - shifting
+ * entire range up:
+ */
+ le64_add_cpu(&new_node.v.start, shift);
+ le64_add_cpu(&new_node.v.end, shift);
+ } else {
+ return 0;
+ }
+
+ ret = bch_btree_insert_at(SNAPSHOT_NONE, iter, &new_node.k_i,
+ NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
+ if (ret)
+ return ret;
+
+ /*
+ * Fix cached entry:
+ *
+ * XXX we're using snapshot_lock to make this safe, need something
+ * better:
+ */
+ entry = rhashtable_lookup_fast(&iter->c->snapshot_cache, &id,
+ bch_snapshot_cache_params);
+ if (entry) {
+ entry->start = le64_to_cpu(new_node.v.start);
+ entry->end = le64_to_cpu(new_node.v.end);
+ }
+
+ return 0;
+}
+
+static int bch_snapshot_renumber(struct cache_set *c, u32 id)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_snapshot_node n;
+ struct snapshot_cached_entry *entry;
+ u64 start, end, shift;
+ int ret = 0;
+
+ rcu_read_lock();
+ entry = bch_snapshot_entry(c, id);
+ start = entry->start;
+ end = entry->end;
+ rcu_read_unlock();
+
+ /*
+ * We can have 32 bits worth of nodes, and we have 64 bits for range
+ * numbering: we'll never run out of our range space if we don't waste
+ * more than (64 - 32) == 32 bits worth per node:
+ */
+ shift = 1 << 30;
+
+ for_each_btree_key_intent(&iter, c, BTREE_ID_SNAPSHOT_TREE, POS_MIN, k)
+ switch (k.k->type) {
+ case BCH_SNAPSHOT_NODE:
+ n = bkey_s_c_to_snapshot_node(k);
+
+ if (le64_to_cpu(n.v->end) < end)
+ continue;
+
+ ret = bch_snapshot_renumber_one(&iter, n, start, end, shift);
+ if (ret)
+ goto err;
+ }
+err:
+ bch_btree_iter_unlock(&iter);
+ return ret;
+}
+
+/*
+ * - Renumber other nodes if necessary
+ *
+ * - Create two new nodes in BTREE_ID_SNAPSHOT_TREE
+ *
+ * - Change existing entry in BTREE_ID_SNAPSHOT_NAMES to point to one of
+ * the two new nodes
+ *
+ * - Create new entry in BTREE_ID_SNAPSHOT_NAMES to point to other new
+ * node
+ *
+ * - Update snapshot handle with new id
+ */
+int bch_snapshot_create(struct cache_set *c,
+ struct snapshot *snapshot,
+ const char *new_name)
+{
+ struct qstr qname1 = QSTR_INIT(snapshot->name, strlen(snapshot->name));
+ struct qstr qname2 = QSTR_INIT(new_name, strlen(new_name));
+ struct bch_hash_info info = { .type = c->sb.snapshot_str_hash_type };
+ struct btree_iter iter_node1, iter_node2, iter_name1, iter_name2;
+ struct bkey_s_c k;
+ struct bkey_i_snapshot_node node1, node2;
+ struct bkey_i_snapshot_name *name1 = NULL, *name2 = NULL;
+ struct snapshot_cached_entry *parent;
+ unsigned name1_u64s, name2_u64s;
+ u32 node_ids[2];
+ int ret;
+
+ mutex_lock(&c->snapshot_lock);
+
+ ret = bch_snapshot_alloc_ids(c, node_ids, snapshot->id);
+ if (ret)
+ goto err;
+
+ ret = bch_snapshot_renumber(c, snapshot->id);
+ if (ret)
+ goto err;
+
+ /* don't actually want to be passing a snapshot here.. */
+ k = bch_hash_lookup_intent(snapshot_hash_desc, &info,
+ c, c->snapshot_root, 0,
+ &iter_name1, &qname1);
+ if (!k.k) {
+ bch_btree_iter_unlock(&iter_name1);
+ ret = -ENOENT;
+ goto err;
+ }
+
+ name1_u64s = k.k->u64s;
+ name2_u64s = sizeof(struct bkey_i_snapshot_name) / sizeof(u64) +
+ DIV_ROUND_UP(qname2.len, sizeof(u64));
+
+ name1 = kzalloc(name1_u64s * sizeof(u64), GFP_KERNEL);
+ name2 = kzalloc(name2_u64s * sizeof(u64), GFP_KERNEL);
+ if (!name1 || !name2) {
+ bch_btree_iter_unlock(&iter_name1);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bkey_snapshot_node_init(&node1.k_i);
+ bkey_snapshot_node_init(&node2.k_i);
+
+ rcu_read_lock();
+ parent = bch_snapshot_entry(c, snapshot->id);
+
+ /*
+ * node1/name1 = existing snapshot, node2/name2 = new snapshot
+ *
+ * We assume that the existing snapshot will continue to have more
+ * snapshots taken, so give the current snapshot's new node the bulk of
+ * the parent's range:
+ */
+ node1.k.p = POS(0, node_ids[0]);
+ node1.v.parent = cpu_to_le64(snapshot->id);
+ node1.v.start = cpu_to_le64(parent->start);
+ node1.v.end = cpu_to_le64(parent->end - 1);
+
+ node2.k.p = POS(0, node_ids[1]);
+ node2.v.parent = cpu_to_le64(snapshot->id);
+ node2.v.start = cpu_to_le64(parent->end - 1);
+ node2.v.end = cpu_to_le64(parent->end);
+ rcu_read_unlock();
+
+ bkey_reassemble(&name1->k_i, k);
+ bch_btree_iter_unlock(&iter_name1);
+
+ name1->v.id = cpu_to_le64(node1.k.p.offset);
+
+ bkey_snapshot_name_init(&name2->k_i);
+ name2->k.u64s = name2_u64s;
+ name2->k.p.offset = bch_snapshot_hash(&info, &qname2);
+ name2->v.id = cpu_to_le64(node2.k.p.offset);
+ name2->v.ctime = cpu_to_le64(ktime_get_seconds());
+ name2->v.mtime = name2->v.ctime;
+ memcpy(name2->v.name, qname2.name, qname2.len);
+
+ bch_btree_iter_init_intent(&iter_node1, c, BTREE_ID_SNAPSHOT_TREE,
+ node1.k.p);
+ bch_btree_iter_init_intent(&iter_node2, c, BTREE_ID_SNAPSHOT_TREE,
+ node2.k.p);
+ bch_btree_iter_init_intent(&iter_name1, c, BTREE_ID_SNAPSHOT_NAMES,
+ name1->k.p);
+ bch_btree_iter_init_intent(&iter_name2, c, BTREE_ID_SNAPSHOT_NAMES,
+ name2->k.p);
+ bch_btree_iter_link(&iter_node1, &iter_node2);
+ bch_btree_iter_link(&iter_node2, &iter_name1);
+ bch_btree_iter_link(&iter_name1, &iter_name2);
+
+ ret = bch_btree_insert_trans(&(struct btree_insert_trans) {
+ .snapshot = SNAPSHOT_NONE,
+ .nr = 4,
+ .entries = (struct btree_trans_entry[]) {
+ { &iter_node1, &node1.k_i, },
+ { &iter_node2, &node2.k_i, },
+ { &iter_name1, &name1->k_i, },
+ { &iter_name2, &name2->k_i, },
+ }},
+ NULL, NULL, NULL, 0);
+ bch_btree_iter_unlock(&iter_node1);
+ bch_btree_iter_unlock(&iter_node2);
+ bch_btree_iter_unlock(&iter_name1);
+ bch_btree_iter_unlock(&iter_name2);
+
+ if (ret)
+ goto err;
+
+ /*
+ * Snapshot created, update handle to point to new snapshot:
+ */
+ snapshot->id = node1.k.p.offset;
+ /*
+ * after updating snapshot handle - so when we return all new index
+ * updates happen with the new snapshot id:
+ */
+ synchronize_rcu();
+err:
+ mutex_unlock(&c->snapshot_lock);
+ kfree(name1);
+ kfree(name2);
+ return ret;
+}
+
+int bch_snapshot_create_initial(struct cache_set *c)
+{
+ struct bch_hash_info info = { .type = c->sb.snapshot_str_hash_type };
+ struct qstr qname = QSTR_INIT("", 0);
+ struct bkey_i_snapshot_node node;
+ struct bkey_i_snapshot_name name;
+ int ret;
+
+ bkey_snapshot_node_init(&node.k_i);
+ node.k.p.offset = KEY_SNAPSHOT_MAX;
+ node.v.parent = cpu_to_le64(0);
+ node.v.start = cpu_to_le64(0);
+ node.v.end = cpu_to_le64(1);
+
+ ret = bch_btree_insert(c, SNAPSHOT_NONE, BTREE_ID_SNAPSHOT_TREE,
+ &node.k_i, NULL, NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ bkey_snapshot_name_init(&node.k_i);
+ name.k.p.offset = bch_snapshot_hash(&info, &qname);
+ name.v.id = cpu_to_le64(node.k.p.offset);
+ name.v.ctime = cpu_to_le64(ktime_get_seconds());
+ name.v.mtime = name.v.ctime;
+
+ ret = bch_btree_insert(c, SNAPSHOT_NONE, BTREE_ID_SNAPSHOT_NAMES,
+ &name.k_i, NULL, NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+void bch_snapshot_close(struct cache_set *c, struct snapshot *snapshot)
+{
+ mutex_lock(&c->snapshot_lock);
+ if (atomic_long_dec_and_test(&snapshot->count)) {
+ list_del(&snapshot->list);
+ kfree(snapshot);
+
+ }
+ mutex_unlock(&c->snapshot_lock);
+}
+
+struct snapshot *bch_snapshot_open(struct cache_set *c, struct qstr name)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_i_snapshot_name *s = NULL;
+ struct bch_hash_info info = { .type = c->sb.snapshot_str_hash_type };
+ struct snapshot *snapshot;
+ size_t name_bytes;
+ int ret;
+
+ mutex_lock(&c->snapshot_lock);
+ k = bch_hash_lookup_intent(snapshot_hash_desc, &info, c,
+ c->snapshot_root, 0, &iter, &name);
+ if (IS_ERR(k.k)) {
+ snapshot = ERR_PTR(-ENOENT);
+ goto err;
+ }
+
+ /* For mtime update: */
+ s = kmalloc(k.k->u64s * sizeof(u64), GFP_KERNEL);
+ if (!s) {
+ snapshot = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+ bkey_reassemble(&s->k_i, k);
+
+ list_for_each_entry(snapshot, &c->snapshot_handles, list)
+ if (snapshot->id == s->v.id) {
+ atomic_long_inc(&snapshot->count);
+ goto found;
+ }
+
+ name_bytes = snapshot_name_bytes(bkey_s_c_to_snapshot_name(k));
+ snapshot = kmalloc(sizeof(*snapshot) + name_bytes + 1, GFP_KERNEL);
+ if (!snapshot) {
+ snapshot = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ list_add(&snapshot->list, &c->snapshot_handles);
+ atomic_long_set(&snapshot->count, 1);
+ snapshot->id = s->v.id;
+ snapshot->root = NULL;
+ memcpy(snapshot->name, s->v.name, name_bytes);
+ snapshot->name[name_bytes] = '\0';
+
+ /*
+ * This is not ideal...
+ */
+ ret = bch_gc_inode_nlinks(c, snapshot);
+ if (ret) {
+ snapshot = ERR_PTR(ret);
+ goto err;
+ }
+
+ bch_fsck(c, snapshot);
+found:
+ /* Update mtime: */
+ s->v.mtime = cpu_to_le64(ktime_get_seconds());
+ bch_btree_insert_at(SNAPSHOT_NONE, &iter, &s->k_i,
+ NULL, NULL, NULL, BTREE_INSERT_NOFAIL);
+err:
+ bch_btree_iter_unlock(&iter);
+ mutex_unlock(&c->snapshot_lock);
+ kfree(s);
+ return snapshot;
+}
+
+void bch_snapshots_exit(struct cache_set *c)
+{
+ if (c->snapshot_cache_init_done)
+ rhashtable_destroy(&c->snapshot_cache);
+}
+
+int bch_snapshots_init(struct cache_set *c)
+{
+ int ret;
+
+ ret = rhashtable_init(&c->snapshot_cache, &bch_snapshot_cache_params);
+ if (ret)
+ return ret;
+
+ c->snapshot_cache_init_done = true;
+ return 0;
+}
diff --git a/drivers/md/bcache/snapshot.h b/drivers/md/bcache/snapshot.h
new file mode 100644
index 000000000000..168f52271f05
--- /dev/null
+++ b/drivers/md/bcache/snapshot.h
@@ -0,0 +1,82 @@
+#ifndef _BCACHE_SNAPSHOT_H
+#define _BCACHE_SNAPSHOT_H
+
+#include <linux/rhashtable.h>
+
+/* Reference to an open snapshot: */
+struct snapshot {
+ struct list_head list;
+ atomic_long_t count;
+ u32 id;
+ struct dentry *root;
+ char name[];
+};
+
+struct snapshot_cached_entry {
+ struct rhash_head hash;
+ u32 id;
+ u32 parent;
+ u64 start;
+ u64 end;
+};
+
+static const struct rhashtable_params bch_snapshot_cache_params = {
+ .head_offset = offsetof(struct snapshot_cached_entry, hash),
+ .key_offset = offsetof(struct snapshot_cached_entry, id),
+ .key_len = sizeof(u32),
+};
+
+struct snapshot_cached_entry *__bch_snapshot_entry(struct cache_set *, u32);
+
+static inline struct snapshot_cached_entry *
+bch_snapshot_entry(struct cache_set *c, u32 id)
+{
+ return rhashtable_lookup_fast(&c->snapshot_cache, &id,
+ bch_snapshot_cache_params)
+ ?: __bch_snapshot_entry(c, id);
+}
+
+static inline bool bch_snapshot_is_descendant(struct cache_set *c,
+ struct snapshot *snapshot,
+ u32 ancestor_id)
+{
+ struct snapshot_cached_entry *child, *ancestor;
+ u32 child_id = snapshot->id;
+ bool ret;
+
+ if (likely(child_id == ancestor_id))
+ return true;
+
+ mutex_lock(&c->snapshot_lock);
+ rcu_read_lock();
+ child = bch_snapshot_entry(c, child_id);
+ if (unlikely(!child)) {
+ }
+
+ ancestor = bch_snapshot_entry(c, ancestor_id);
+ if (unlikely(!ancestor)) {
+ }
+
+ ret = child->start >= ancestor->start &&
+ child->end <= ancestor->end;
+ rcu_read_unlock();
+ mutex_unlock(&c->snapshot_lock);
+
+ return ret;
+}
+
+int bch_snapshot_create(struct cache_set *, struct snapshot *, const char *);
+int bch_snapshot_create_initial(struct cache_set *);
+
+void bch_snapshot_close(struct cache_set *, struct snapshot *);
+struct snapshot *bch_snapshot_open(struct cache_set *, struct qstr);
+
+extern const struct btree_keys_ops bch_snapshot_name_ops;
+extern const struct bkey_ops bch_bkey_snapshot_name_ops;
+extern const struct btree_keys_ops bch_snapshot_tree_ops;
+extern const struct bkey_ops bch_bkey_snapshot_tree_ops;
+
+void bch_snapshots_exit(struct cache_set *);
+int bch_snapshots_init(struct cache_set *);
+
+#endif /* _BCACHE_SNAPSHOT_H */
diff --git a/drivers/md/bcache/str_hash.h b/drivers/md/bcache/str_hash.h
index d296e9e92894..eb8570f36cfb 100644
--- a/drivers/md/bcache/str_hash.h
+++ b/drivers/md/bcache/str_hash.h
@@ -106,12 +106,13 @@ struct bch_hash_desc {
static inline struct bkey_s_c
bch_hash_lookup_at(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
+ struct snapshot *snapshot,
struct btree_iter *iter, const void *search)
{
struct bkey_s_c k;
u64 inode = iter->pos.inode;
- while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
+ while ((k = bch_btree_iter_peek_snapshot_with_holes(iter, snapshot)).k) {
if (k.k->p.inode != inode)
break;
@@ -134,12 +135,13 @@ bch_hash_lookup_at(const struct bch_hash_desc desc,
static inline struct bkey_s_c
bch_hash_lookup_bkey_at(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
+ struct snapshot *snapshot,
struct btree_iter *iter, struct bkey_s_c search)
{
struct bkey_s_c k;
u64 inode = iter->pos.inode;
- while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
+ while ((k = bch_btree_iter_peek_snapshot_with_holes(iter, snapshot)).k) {
if (k.k->p.inode != inode)
break;
@@ -162,34 +164,35 @@ bch_hash_lookup_bkey_at(const struct bch_hash_desc desc,
static inline struct bkey_s_c
bch_hash_lookup(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
- struct cache_set *c, u64 inode,
- struct btree_iter *iter, const void *key)
+ struct cache_set *c, struct snapshot *snapshot,
+ u64 inode, struct btree_iter *iter, const void *key)
{
bch_btree_iter_init(iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)));
- return bch_hash_lookup_at(desc, info, iter, key);
+ return bch_hash_lookup_at(desc, info, snapshot, iter, key);
}
static inline struct bkey_s_c
bch_hash_lookup_intent(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
- struct cache_set *c, u64 inode,
- struct btree_iter *iter, const void *key)
+ struct cache_set *c, struct snapshot *snapshot,
+ u64 inode, struct btree_iter *iter, const void *key)
{
bch_btree_iter_init_intent(iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)));
- return bch_hash_lookup_at(desc, info, iter, key);
+ return bch_hash_lookup_at(desc, info, snapshot, iter, key);
}
static inline struct bkey_s_c
-bch_hash_hole_at(const struct bch_hash_desc desc, struct btree_iter *iter)
+bch_hash_hole_at(const struct bch_hash_desc desc, struct snapshot *snapshot,
+ struct btree_iter *iter)
{
struct bkey_s_c k;
u64 inode = iter->pos.inode;
- while ((k = bch_btree_iter_peek_with_holes(iter)).k) {
+ while ((k = bch_btree_iter_peek_snapshot_with_holes(iter, snapshot)).k) {
if (k.k->p.inode != inode)
break;
@@ -205,18 +208,21 @@ bch_hash_hole_at(const struct bch_hash_desc desc, struct btree_iter *iter)
static inline struct bkey_s_c bch_hash_hole(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
- struct cache_set *c, u64 inode,
+ struct cache_set *c,
+ struct snapshot *snapshot,
+ u64 inode,
struct btree_iter *iter,
const void *key)
{
bch_btree_iter_init_intent(iter, c, desc.btree_id,
POS(inode, desc.hash_key(info, key)));
- return bch_hash_hole_at(desc, iter);
+ return bch_hash_hole_at(desc, snapshot, iter);
}
static inline bool bch_hash_needs_whiteout(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
+ struct snapshot *snapshot,
struct btree_iter *iter,
struct btree_iter *start)
{
@@ -226,7 +232,7 @@ static inline bool bch_hash_needs_whiteout(const struct bch_hash_desc desc,
while (1) {
bch_btree_iter_advance_pos(iter);
- k = bch_btree_iter_peek_with_holes(iter);
+ k = bch_btree_iter_peek_snapshot_with_holes(iter, snapshot);
if (!k.k)
return iter->error ? true : false;
@@ -246,9 +252,10 @@ static inline bool bch_hash_needs_whiteout(const struct bch_hash_desc desc,
static inline int bch_hash_set(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
- struct cache_set *c, u64 inode,
- u64 *journal_seq,
- struct bkey_i *insert, int flags)
+ struct cache_set *c,
+ struct snapshot *snapshot,
+ u64 inode, struct bkey_i *insert,
+ u64 *journal_seq, int flags)
{
struct btree_iter iter, hashed_slot;
struct bkey_s_c k;
@@ -271,7 +278,7 @@ static inline int bch_hash_set(const struct bch_hash_desc desc,
* while we dropped locks:
*/
bch_btree_iter_copy(&iter, &hashed_slot);
- k = bch_hash_lookup_bkey_at(desc, info, &iter,
+ k = bch_hash_lookup_bkey_at(desc, info, snapshot, &iter,
bkey_i_to_s_c(insert));
if (IS_ERR(k.k)) {
if (flags & BCH_HASH_SET_MUST_REPLACE) {
@@ -286,7 +293,7 @@ static inline int bch_hash_set(const struct bch_hash_desc desc,
* slot we hashed to:
*/
bch_btree_iter_copy(&iter, &hashed_slot);
- k = bch_hash_hole_at(desc, &iter);
+ k = bch_hash_hole_at(desc, snapshot, &iter);
if (IS_ERR(k.k)) {
ret = PTR_ERR(k.k);
goto err;
@@ -299,7 +306,7 @@ static inline int bch_hash_set(const struct bch_hash_desc desc,
}
insert->k.p = iter.pos;
- ret = bch_btree_insert_at(&iter, insert, NULL, NULL,
+ ret = bch_btree_insert_at(snapshot, &iter, insert, NULL, NULL,
journal_seq, BTREE_INSERT_ATOMIC);
/* unlock before traversing hashed_slot: */
@@ -321,8 +328,9 @@ err:
static inline int bch_hash_delete(const struct bch_hash_desc desc,
const struct bch_hash_info *info,
- struct cache_set *c, u64 inode,
- u64 *journal_seq, const void *key)
+ struct cache_set *c,
+ struct snapshot *snapshot,
+ u64 inode, u64 *journal_seq, const void *key)
{
struct btree_iter iter, whiteout_iter;
struct bkey_s_c k;
@@ -333,18 +341,18 @@ static inline int bch_hash_delete(const struct bch_hash_desc desc,
POS(inode, desc.hash_key(info, key)));
do {
- k = bch_hash_lookup_at(desc, info, &iter, key);
+ k = bch_hash_lookup_at(desc, info, snapshot, &iter, key);
if (IS_ERR(k.k))
return bch_btree_iter_unlock(&iter) ?: -ENOENT;
bkey_init(&delete.k);
delete.k.p = k.k->p;
delete.k.type = bch_hash_needs_whiteout(desc, info,
- &whiteout_iter, &iter)
+ snapshot, &whiteout_iter, &iter)
? desc.whiteout_type
: KEY_TYPE_DELETED;
- ret = bch_btree_insert_at(&iter, &delete,
+ ret = bch_btree_insert_at(snapshot, &iter, &delete,
NULL, NULL, journal_seq,
BTREE_INSERT_NOFAIL|
BTREE_INSERT_ATOMIC);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index ddadcd591071..2a3c2ab0bd04 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -16,7 +16,6 @@
#include "clock.h"
#include "debug.h"
#include "error.h"
-#include "fs-gc.h"
#include "inode.h"
#include "io.h"
#include "journal.h"
@@ -25,6 +24,7 @@
#include "migrate.h"
#include "movinggc.h"
#include "notify.h"
+#include "snapshot.h"
#include "stats.h"
#include "super.h"
#include "tier.h"
@@ -901,6 +901,7 @@ static void cache_set_free(struct cache_set *c)
cancel_work_sync(&c->bio_submit_work);
cancel_work_sync(&c->read_retry_work);
+ bch_snapshots_exit(c);
bch_bset_sort_state_free(&c->sort);
bch_btree_cache_free(c);
bch_journal_free(&c->journal);
@@ -964,6 +965,10 @@ static void __cache_set_stop3(struct closure *cl)
struct cache *ca;
unsigned i;
+ if (c->snapshot_root)
+ bch_snapshot_close(c, c->snapshot_root);
+ c->snapshot_root = NULL;
+
mutex_lock(&bch_register_lock);
for_each_cache(ca, c, i)
bch_cache_stop(ca);
@@ -1117,6 +1122,9 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
spin_lock_init(&c->read_retry_lock);
INIT_WORK(&c->read_retry_work, bch_read_retry_work);
+ INIT_LIST_HEAD(&c->snapshot_handles);
+ mutex_init(&c->snapshot_lock);
+
seqcount_init(&c->gc_pos_lock);
c->prio_clock[READ].hand = 1;
@@ -1189,7 +1197,8 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
bch_journal_alloc(&c->journal) ||
bch_btree_cache_alloc(c) ||
bch_bset_sort_state_init(&c->sort, ilog2(btree_pages(c)),
- &c->btree_sort_time))
+ &c->btree_sort_time) ||
+ bch_snapshots_init(c))
goto err;
for_each_possible_cpu(cpu) {
@@ -1269,6 +1278,7 @@ static int bch_cache_set_online(struct cache_set *c)
static const char *run_cache_set(struct cache_set *c)
{
const char *err = "cannot allocate memory";
+ struct snapshot *snapshot;
struct cache *ca;
unsigned i, id;
time64_t now;
@@ -1366,11 +1376,13 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_journal_replay(c, &journal))
goto err;
- err = "error gcing inode nlinks";
- if (bch_gc_inode_nlinks(c))
+
+ err = "error opening root snapshot";
+ snapshot = bch_snapshot_open(c, (struct qstr) { 0 });
+ if (IS_ERR(snapshot))
goto err;
- bch_fsck(c);
+ c->snapshot_root = snapshot;
} else {
struct bkey_i_inode inode;
struct closure cl;
@@ -1412,6 +1424,17 @@ static const char *run_cache_set(struct cache_set *c)
/* Wait for new btree roots to be written: */
closure_sync(&cl);
+ err = "error creating initial snapshot";
+ if (bch_snapshot_create_initial(c))
+ goto err;
+
+ err = "error opening root snapshot";
+ snapshot = bch_snapshot_open(c, (struct qstr) { 0 });
+ if (IS_ERR(snapshot))
+ goto err;
+
+ c->snapshot_root = snapshot;
+
bkey_inode_init(&inode.k_i);
inode.k.p.inode = BCACHE_ROOT_INO;
inode.v.i_mode = cpu_to_le16(S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO);
@@ -1420,8 +1443,8 @@ static const char *run_cache_set(struct cache_set *c)
SET_INODE_STR_HASH_TYPE(&inode.v, c->sb.str_hash_type);
err = "error creating root directory";
- if (bch_btree_insert(c, BTREE_ID_INODES, &inode.k_i,
- NULL, NULL, NULL, 0))
+ if (bch_btree_insert(c, c->snapshot_root, BTREE_ID_INODES,
+ &inode.k_i, NULL, NULL, NULL, 0))
goto err;
err = "error writing first journal entry";
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 1652c09b7fb6..7b3f57dbeb6d 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -234,7 +234,6 @@ STORE(__cached_dev)
struct cached_dev *dc = container_of(kobj, struct cached_dev,
disk.kobj);
unsigned v = size;
- struct cache_set *c;
struct kobj_uevent_env *env;
#define d_strtoul(var) sysfs_strtoul(var, dc->var)
@@ -291,7 +290,9 @@ STORE(__cached_dev)
bch_write_bdev_super(dc, NULL);
if (dc->disk.c)
- ret = bch_inode_update(dc->disk.c, &dc->disk.inode.k_i,
+ ret = bch_inode_update(dc->disk.c,
+ dc->disk.c->snapshot_root,
+ &dc->disk.inode.k_i,
&journal_seq);
mutex_unlock(&dc->disk.inode_lock);
@@ -317,6 +318,8 @@ STORE(__cached_dev)
}
if (attr == &sysfs_attach) {
+ struct cache_set *c;
+
if (uuid_parse(buf, &dc->disk_sb.sb->user_uuid))
return -EINVAL;
@@ -415,7 +418,8 @@ STORE(__bch_blockdev_volume)
mutex_lock(&d->inode_lock);
if (v < le64_to_cpu(d->inode.v.i_size) ){
- ret = bch_inode_truncate(d->c, d->inode.k.p.inode,
+ ret = bch_inode_truncate(d->c, d->c->snapshot_root,
+ d->inode.k.p.inode,
v >> 9, NULL, NULL);
if (ret) {
mutex_unlock(&d->inode_lock);
@@ -423,7 +427,8 @@ STORE(__bch_blockdev_volume)
}
}
d->inode.v.i_size = cpu_to_le64(v);
- ret = bch_inode_update(d->c, &d->inode.k_i, &journal_seq);
+ ret = bch_inode_update(d->c, d->c->snapshot_root,
+ &d->inode.k_i, &journal_seq);
mutex_unlock(&d->inode_lock);
@@ -444,7 +449,8 @@ STORE(__bch_blockdev_volume)
mutex_lock(&d->inode_lock);
memcpy(d->inode.v.i_label, buf, SB_LABEL_SIZE);
- ret = bch_inode_update(d->c, &d->inode.k_i, &journal_seq);
+ ret = bch_inode_update(d->c, d->c->snapshot_root,
+ &d->inode.k_i, &journal_seq);
mutex_unlock(&d->inode_lock);
diff --git a/drivers/md/bcache/tier.c b/drivers/md/bcache/tier.c
index caf6b3df2c9c..6366a80a3eaf 100644
--- a/drivers/md/bcache/tier.c
+++ b/drivers/md/bcache/tier.c
@@ -222,7 +222,7 @@ static int issue_tiering_move(struct moving_queue *q,
bch_replace_init(&io->replace, bkey_i_to_s_c(&io->key));
- bch_write_op_init(&io->op, c, &io->wbio,
+ bch_write_op_init(&io->op, c, SNAPSHOT_NONE, &io->wbio,
(struct disk_reservation) { 0 },
&ca->tiering_write_point,
bkey_i_to_s_c(&io->key),
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3db1f0827ca3..816011d1afa3 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -118,7 +118,8 @@ static void write_dirty_finish(struct closure *cl)
io->replace.hook.fn = bch_extent_cmpxchg;
bkey_extent_set_cached(&tmp.k.k, true);
- ret = bch_btree_insert(dc->disk.c, BTREE_ID_EXTENTS, &tmp.k,
+ ret = bch_btree_insert(dc->disk.c, SNAPSHOT_NONE,
+ BTREE_ID_EXTENTS, &tmp.k,
NULL, &io->replace.hook, NULL, 0);
if (io->replace.successes == 0)
trace_bcache_writeback_collision(&io->replace.key.k);
diff --git a/drivers/md/bcache/xattr.c b/drivers/md/bcache/xattr.c
index bfaf2875e229..d47470398085 100644
--- a/drivers/md/bcache/xattr.c
+++ b/drivers/md/bcache/xattr.c
@@ -153,7 +153,7 @@ int bch_xattr_get(struct inode *inode, const char *name,
int ret;
k = bch_hash_lookup(xattr_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &iter,
+ ei->snapshot, ei->vfs_inode.i_ino, &iter,
&X_SEARCH(type, name, strlen(name)));
if (IS_ERR(k.k))
return bch_btree_iter_unlock(&iter) ?: -ENODATA;
@@ -181,8 +181,8 @@ int bch_xattr_set(struct inode *inode, const char *name,
int ret;
if (!value) {
- ret = bch_hash_delete(xattr_hash_desc, &ei->str_hash,
- c, ei->vfs_inode.i_ino,
+ ret = bch_hash_delete(xattr_hash_desc, &ei->str_hash, c,
+ ei->snapshot, ei->vfs_inode.i_ino,
&ei->journal_seq, &search);
} else {
struct bkey_i_xattr *xattr;
@@ -207,8 +207,8 @@ int bch_xattr_set(struct inode *inode, const char *name,
memcpy(xattr_val(&xattr->v), value, size);
ret = bch_hash_set(xattr_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &ei->journal_seq,
- &xattr->k_i,
+ ei->snapshot, ei->vfs_inode.i_ino,
+ &xattr->k_i, &ei->journal_seq,
(flags & XATTR_CREATE ? BCH_HASH_SET_MUST_CREATE : 0)|
(flags & XATTR_REPLACE ? BCH_HASH_SET_MUST_REPLACE : 0));
kfree(xattr);
@@ -249,15 +249,18 @@ static size_t bch_xattr_emit(struct dentry *dentry,
ssize_t bch_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
+ struct inode *inode = dentry->d_inode;
+ struct bch_inode_info *ei = to_bch_ei(inode);
struct cache_set *c = dentry->d_sb->s_fs_info;
struct btree_iter iter;
struct bkey_s_c k;
const struct bch_xattr *xattr;
- u64 inum = dentry->d_inode->i_ino;
+ u64 inum = inode->i_ino;
ssize_t ret = 0;
size_t len;
- for_each_btree_key(&iter, c, BTREE_ID_XATTRS, POS(inum, 0), k) {
+ for_each_snapshot_key(&iter, c, ei->snapshot, BTREE_ID_XATTRS,
+ POS(inum, 0), k) {
BUG_ON(k.k->p.inode < inum);
if (k.k->p.inode > inum)
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 6006d50418cf..10b8392c0669 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -613,6 +613,42 @@ struct bch_xattr {
} __attribute__((packed));
BKEY_VAL_TYPE(xattr, BCH_XATTR);
+/* Snapshots */
+
+enum {
+ BCH_SNAPSHOT_NAME = 128,
+ BCH_SNAPSHOT_NAME_WHITEOUT = 129,
+};
+
+struct bch_snapshot_name {
+ struct bch_val v;
+ __le64 id;
+ __le64 flags;
+ /* Seconds: */
+ __le64 ctime;
+ __le64 mtime;
+ __u8 name[];
+} __attribute__((packed));
+BKEY_VAL_TYPE(snapshot_name, BCH_SNAPSHOT_NAME);
+
+enum {
+ BCH_SNAPSHOT_NODE = 128,
+};
+
+struct bch_snapshot_node {
+ struct bch_val v;
+ __le64 parent;
+
+ /* Denotes a half open interval: order of all descendants of this node
+ * will lie within this node's [start, end):
+ *
+ * Note that child nodes can have the same order as this node.
+ */
+ __le64 start;
+ __le64 end;
+} __attribute__((packed));
+BKEY_VAL_TYPE(snapshot_node, BCH_SNAPSHOT_NODE);
+
/* Superblock */
/* Version 0: Cache device
@@ -1073,10 +1109,12 @@ LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4);
/* Btree: */
#define DEFINE_BCH_BTREE_IDS() \
- DEF_BTREE_ID(EXTENTS, 0, "extents") \
- DEF_BTREE_ID(INODES, 1, "inodes") \
- DEF_BTREE_ID(DIRENTS, 2, "dirents") \
- DEF_BTREE_ID(XATTRS, 3, "xattrs")
+ DEF_BTREE_ID(EXTENTS, 0, "extents") \
+ DEF_BTREE_ID(INODES, 1, "inodes") \
+ DEF_BTREE_ID(DIRENTS, 2, "dirents") \
+ DEF_BTREE_ID(XATTRS, 3, "xattrs") \
+ DEF_BTREE_ID(SNAPSHOT_NAMES, 4, "snapshot_names") \
+ DEF_BTREE_ID(SNAPSHOT_TREE, 5, "snapshot_tree")
#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val,