summaryrefslogtreecommitdiff
path: root/libbcache
diff options
context:
space:
mode:
Diffstat (limited to 'libbcache')
-rw-r--r--libbcache/alloc.c3
-rw-r--r--libbcache/bcache.h5
-rw-r--r--libbcache/bkey_methods.c10
-rw-r--r--libbcache/bkey_methods.h2
-rw-r--r--libbcache/btree_cache.c56
-rw-r--r--libbcache/btree_cache.h10
-rw-r--r--libbcache/btree_io.c7
-rw-r--r--libbcache/btree_types.h3
-rw-r--r--libbcache/btree_update.h7
-rw-r--r--libbcache/debug.c60
-rw-r--r--libbcache/dirent.c49
-rw-r--r--libbcache/dirent.h12
-rw-r--r--libbcache/error.h63
-rw-r--r--libbcache/extents.c7
-rw-r--r--libbcache/extents.h9
-rw-r--r--libbcache/fs-gc.c710
-rw-r--r--libbcache/fs-gc.h3
-rw-r--r--libbcache/fs.c45
-rw-r--r--libbcache/inode.c32
-rw-r--r--libbcache/inode.h2
-rw-r--r--libbcache/io.c5
-rw-r--r--libbcache/journal.c37
-rw-r--r--libbcache/movinggc.c16
-rw-r--r--libbcache/opts.c46
-rw-r--r--libbcache/opts.h21
-rw-r--r--libbcache/str_hash.h8
-rw-r--r--libbcache/super.c117
-rw-r--r--libbcache/sysfs.c29
-rw-r--r--libbcache/tier.c3
29 files changed, 959 insertions, 418 deletions
diff --git a/libbcache/alloc.c b/libbcache/alloc.c
index cff750c..4fe08b5 100644
--- a/libbcache/alloc.c
+++ b/libbcache/alloc.c
@@ -254,6 +254,9 @@ static int bch_prio_write(struct cache *ca)
bool need_new_journal_entry;
int i, ret;
+ if (c->opts.nochanges)
+ return 0;
+
trace_bcache_prio_write_start(ca);
atomic64_add(ca->mi.bucket_size * prio_buckets(ca),
diff --git a/libbcache/bcache.h b/libbcache/bcache.h
index 9a43a69..309d372 100644
--- a/libbcache/bcache.h
+++ b/libbcache/bcache.h
@@ -210,8 +210,9 @@
#define bch_meta_write_fault(name) \
dynamic_fault("bcache:meta:write:" name)
-#define bch_fmt(_c, fmt) \
- "bcache (%s): " fmt "\n", ((_c)->name)
+#ifndef bch_fmt
+#define bch_fmt(_c, fmt) "bcache (%s): " fmt "\n", ((_c)->name)
+#endif
#define bch_info(c, fmt, ...) \
printk(KERN_INFO bch_fmt(c, fmt), ##__VA_ARGS__)
diff --git a/libbcache/bkey_methods.c b/libbcache/bkey_methods.c
index 3bcd0e0..90f7e5f 100644
--- a/libbcache/bkey_methods.c
+++ b/libbcache/bkey_methods.c
@@ -89,6 +89,16 @@ void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k)
ops->key_debugcheck(c, b, k);
}
+void bch_val_to_text(struct cache_set *c, enum bkey_type type,
+ char *buf, size_t size, struct bkey_s_c k)
+{
+ const struct bkey_ops *ops = bch_bkey_ops[type];
+
+ if (k.k->type >= KEY_TYPE_GENERIC_NR &&
+ ops->val_to_text)
+ ops->val_to_text(c, buf, size, k);
+}
+
void bch_bkey_val_to_text(struct cache_set *c, enum bkey_type type,
char *buf, size_t size, struct bkey_s_c k)
{
diff --git a/libbcache/bkey_methods.h b/libbcache/bkey_methods.h
index 0e305eb..c1f0dc5 100644
--- a/libbcache/bkey_methods.h
+++ b/libbcache/bkey_methods.h
@@ -67,6 +67,8 @@ const char *btree_bkey_invalid(struct cache_set *, struct btree *,
struct bkey_s_c);
void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c);
+void bch_val_to_text(struct cache_set *, enum bkey_type,
+ char *, size_t, struct bkey_s_c);
void bch_bkey_val_to_text(struct cache_set *, enum bkey_type,
char *, size_t, struct bkey_s_c);
diff --git a/libbcache/btree_cache.c b/libbcache/btree_cache.c
index 0994190..ca6064a 100644
--- a/libbcache/btree_cache.c
+++ b/libbcache/btree_cache.c
@@ -149,7 +149,8 @@ static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush)
if (!six_trylock_write(&b->lock))
goto out_unlock_intent;
- if (btree_node_write_error(b))
+ if (btree_node_write_error(b) ||
+ btree_node_noevict(b))
goto out_unlock;
if (!list_empty(&b->write_blocked))
@@ -699,3 +700,56 @@ retry:
return b;
}
+
+int bch_print_btree_node(struct cache_set *c, struct btree *b,
+ char *buf, size_t len)
+{
+ const struct bkey_format *f = &b->format;
+ struct bset_stats stats;
+ char ptrs[100];
+
+ memset(&stats, 0, sizeof(stats));
+
+ bch_val_to_text(c, BKEY_TYPE_BTREE, ptrs, sizeof(ptrs),
+ bkey_i_to_s_c(&b->key));
+ bch_btree_keys_stats(b, &stats);
+
+ return scnprintf(buf, len,
+ "l %u %llu:%llu - %llu:%llu:\n"
+ " ptrs: %s\n"
+ " format: u64s %u fields %u %u %u %u %u\n"
+ " unpack fn len: %u\n"
+ " bytes used %zu/%zu (%zu%% full)\n"
+ " sib u64s: %u, %u (merge threshold %zu)\n"
+ " nr packed keys %u\n"
+ " nr unpacked keys %u\n"
+ " floats %zu\n"
+ " failed unpacked %zu\n"
+ " failed prev %zu\n"
+ " failed overflow %zu\n",
+ b->level,
+ b->data->min_key.inode,
+ b->data->min_key.offset,
+ b->data->max_key.inode,
+ b->data->max_key.offset,
+ ptrs,
+ f->key_u64s,
+ f->bits_per_field[0],
+ f->bits_per_field[1],
+ f->bits_per_field[2],
+ f->bits_per_field[3],
+ f->bits_per_field[4],
+ b->unpack_fn_len,
+ b->nr.live_u64s * sizeof(u64),
+ btree_bytes(c) - sizeof(struct btree_node),
+ b->nr.live_u64s * 100 / btree_max_u64s(c),
+ b->sib_u64s[0],
+ b->sib_u64s[1],
+ BTREE_FOREGROUND_MERGE_THRESHOLD(c),
+ b->nr.packed_keys,
+ b->nr.unpacked_keys,
+ stats.floats,
+ stats.failed_unpacked,
+ stats.failed_prev,
+ stats.failed_overflow);
+}
diff --git a/libbcache/btree_cache.h b/libbcache/btree_cache.h
index e745abb..c26489d 100644
--- a/libbcache/btree_cache.h
+++ b/libbcache/btree_cache.h
@@ -56,6 +56,16 @@ static inline unsigned btree_blocks(struct cache_set *c)
return c->sb.btree_node_size >> c->block_bits;
}
+#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4)
+
+#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
+#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
+ (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
+
#define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->btree_id].b)
+int bch_print_btree_node(struct cache_set *, struct btree *,
+ char *, size_t);
+
#endif /* _BCACHE_BTREE_CACHE_H */
diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c
index ff976b5..4c295af 100644
--- a/libbcache/btree_io.c
+++ b/libbcache/btree_io.c
@@ -200,7 +200,7 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst,
const struct bkey_format *f = &iter->b->format;
struct bkey_packed *in, *out = dst;
struct bkey_i l, r;
- bool prev = false, l_packed;
+ bool prev = false, l_packed = false;
u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE);
u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET);
u64 new_size;
@@ -1443,8 +1443,9 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b,
* Make sure to update b->written so bch_btree_init_next() doesn't
* break:
*/
- if (bch_journal_error(&c->journal)) {
- set_btree_node_write_error(b);
+ if (bch_journal_error(&c->journal) ||
+ c->opts.nochanges) {
+ set_btree_node_noevict(b);
b->written += sectors_to_write;
btree_bounce_free(c, order, used_mempool, data);
diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h
index 3632a04..176d42a 100644
--- a/libbcache/btree_types.h
+++ b/libbcache/btree_types.h
@@ -2,6 +2,7 @@
#define _BCACHE_BTREE_TYPES_H
#include <linux/bcache.h>
+#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/rhashtable.h>
#include <linux/semaphore.h>
@@ -138,6 +139,7 @@ enum btree_flags {
BTREE_NODE_read_error,
BTREE_NODE_write_error,
BTREE_NODE_dirty,
+ BTREE_NODE_noevict,
BTREE_NODE_write_idx,
BTREE_NODE_accessed,
BTREE_NODE_write_in_flight,
@@ -147,6 +149,7 @@ enum btree_flags {
BTREE_FLAG(read_error);
BTREE_FLAG(write_error);
BTREE_FLAG(dirty);
+BTREE_FLAG(noevict);
BTREE_FLAG(write_idx);
BTREE_FLAG(accessed);
BTREE_FLAG(write_in_flight);
diff --git a/libbcache/btree_update.h b/libbcache/btree_update.h
index 0154441..5fc1b1a 100644
--- a/libbcache/btree_update.h
+++ b/libbcache/btree_update.h
@@ -11,13 +11,6 @@ struct bkey_format_state;
struct bkey_format;
struct btree;
-#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4)
-
-#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3)
-#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \
- (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \
- (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2))
-
static inline void btree_node_reset_sib_u64s(struct btree *b)
{
b->sib_u64s[0] = b->nr.live_u64s;
diff --git a/libbcache/debug.c b/libbcache/debug.c
index 1be2e60..39f5550 100644
--- a/libbcache/debug.c
+++ b/libbcache/debug.c
@@ -46,6 +46,9 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b)
struct bio *bio;
struct closure cl;
+ if (c->opts.nochanges)
+ return;
+
closure_init_stack(&cl);
btree_node_io_lock(b);
@@ -296,55 +299,6 @@ static const struct file_operations btree_debug_ops = {
.read = bch_read_btree,
};
-static int print_btree_node(struct dump_iter *i, struct btree *b)
-{
- const struct bkey_format *f = &b->format;
- struct bset_stats stats;
-
- memset(&stats, 0, sizeof(stats));
-
- bch_btree_keys_stats(b, &stats);
-
- i->bytes = scnprintf(i->buf, sizeof(i->buf),
- "l %u %llu:%llu - %llu:%llu:\n"
- " format: u64s %u fields %u %u %u %u %u\n"
- " unpack fn len: %u\n"
- " bytes used %zu/%zu (%zu%% full)\n"
- " sib u64s: %u, %u (merge threshold %zu)\n"
- " nr packed keys %u\n"
- " nr unpacked keys %u\n"
- " floats %zu\n"
- " failed unpacked %zu\n"
- " failed prev %zu\n"
- " failed overflow %zu\n",
- b->level,
- b->data->min_key.inode,
- b->data->min_key.offset,
- b->data->max_key.inode,
- b->data->max_key.offset,
- f->key_u64s,
- f->bits_per_field[0],
- f->bits_per_field[1],
- f->bits_per_field[2],
- f->bits_per_field[3],
- f->bits_per_field[4],
- b->unpack_fn_len,
- b->nr.live_u64s * sizeof(u64),
- btree_bytes(i->c) - sizeof(struct btree_node),
- b->nr.live_u64s * 100 / btree_max_u64s(i->c),
- b->sib_u64s[0],
- b->sib_u64s[1],
- BTREE_FOREGROUND_MERGE_THRESHOLD(i->c),
- b->nr.packed_keys,
- b->nr.unpacked_keys,
- stats.floats,
- stats.failed_unpacked,
- stats.failed_prev,
- stats.failed_overflow);
-
- return flush_buf(i);
-}
-
static ssize_t bch_read_btree_formats(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@@ -365,7 +319,9 @@ static ssize_t bch_read_btree_formats(struct file *file, char __user *buf,
return i->ret;
for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) {
- err = print_btree_node(i, b);
+ i->bytes = bch_print_btree_node(i->c, b, i->buf,
+ sizeof(i->buf));
+ err = flush_buf(i);
if (err)
break;
@@ -421,7 +377,9 @@ static ssize_t bch_read_bfloat_failed(struct file *file, char __user *buf,
struct bkey_packed *_k = bch_btree_node_iter_peek(node_iter, b);
if (iter.nodes[0] != prev_node) {
- err = print_btree_node(i, iter.nodes[0]);
+ i->bytes = bch_print_btree_node(i->c, b, i->buf,
+ sizeof(i->buf));
+ err = flush_buf(i);
if (err)
break;
}
diff --git a/libbcache/dirent.c b/libbcache/dirent.c
index 920ad2f..d97c3b2 100644
--- a/libbcache/dirent.c
+++ b/libbcache/dirent.c
@@ -10,7 +10,7 @@
#include <linux/dcache.h>
-static unsigned dirent_name_bytes(struct bkey_s_c_dirent d)
+unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent d)
{
unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent);
@@ -61,7 +61,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
{
struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
- struct qstr name = QSTR_INIT(d.v->d_name, dirent_name_bytes(d));
+ struct qstr name = QSTR_INIT(d.v->d_name, bch_dirent_name_bytes(d));
return bch_dirent_hash(info, &name);
}
@@ -69,7 +69,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k)
static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r)
{
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
- int len = dirent_name_bytes(l);
+ int len = bch_dirent_name_bytes(l);
const struct qstr *r = _r;
return len - r->len ?: memcmp(l.v->d_name, r->name, len);
@@ -79,8 +79,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r)
{
struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l);
struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r);
- int l_len = dirent_name_bytes(l);
- int r_len = dirent_name_bytes(r);
+ int l_len = bch_dirent_name_bytes(l);
+ int r_len = bch_dirent_name_bytes(r);
return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len);
}
@@ -125,7 +125,7 @@ static void bch_dirent_to_text(struct cache_set *c, char *buf,
if (size) {
unsigned n = min_t(unsigned, size,
- dirent_name_bytes(d));
+ bch_dirent_name_bytes(d));
memcpy(buf, d.v->d_name, n);
buf[size - 1] = '\0';
buf += n;
@@ -167,15 +167,16 @@ static struct bkey_i_dirent *dirent_create_key(u8 type,
bkey_val_bytes(&dirent->k) -
(sizeof(struct bch_dirent) + name->len));
- EBUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
+ EBUG_ON(bch_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len);
return dirent;
}
-int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type,
- const struct qstr *name, u64 dst_inum)
+int bch_dirent_create(struct cache_set *c, u64 dir_inum,
+ const struct bch_hash_info *hash_info,
+ u8 type, const struct qstr *name, u64 dst_inum,
+ u64 *journal_seq, int flags)
{
- struct bch_inode_info *ei = to_bch_ei(dir);
struct bkey_i_dirent *dirent;
int ret;
@@ -183,9 +184,8 @@ int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type,
if (!dirent)
return -ENOMEM;
- ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &ei->journal_seq,
- &dirent->k_i, BCH_HASH_SET_MUST_CREATE);
+ ret = bch_hash_set(dirent_hash_desc, hash_info, c, dir_inum,
+ journal_seq, &dirent->k_i, flags);
kfree(dirent);
return ret;
@@ -346,26 +346,25 @@ err:
return ret;
}
-int bch_dirent_delete(struct cache_set *c, struct inode *dir,
- const struct qstr *name)
+int bch_dirent_delete(struct cache_set *c, u64 dir_inum,
+ const struct bch_hash_info *hash_info,
+ const struct qstr *name,
+ u64 *journal_seq)
{
- struct bch_inode_info *ei = to_bch_ei(dir);
-
- return bch_hash_delete(dirent_hash_desc, &ei->str_hash,
- c, ei->vfs_inode.i_ino,
- &ei->journal_seq, name);
+ return bch_hash_delete(dirent_hash_desc, hash_info,
+ c, dir_inum, journal_seq, name);
}
-u64 bch_dirent_lookup(struct cache_set *c, struct inode *dir,
+u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum,
+ const struct bch_hash_info *hash_info,
const struct qstr *name)
{
- struct bch_inode_info *ei = to_bch_ei(dir);
struct btree_iter iter;
struct bkey_s_c k;
u64 inum;
- k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c,
- ei->vfs_inode.i_ino, &iter, name);
+ k = bch_hash_lookup(dirent_hash_desc, hash_info, c,
+ dir_inum, &iter, name);
if (IS_ERR(k.k)) {
bch_btree_iter_unlock(&iter);
return 0;
@@ -428,7 +427,7 @@ int bch_readdir(struct cache_set *c, struct file *file,
if (k.k->p.inode > inode->i_ino)
break;
- len = dirent_name_bytes(dirent);
+ len = bch_dirent_name_bytes(dirent);
pr_debug("emitting %s", dirent.v->d_name);
diff --git a/libbcache/dirent.h b/libbcache/dirent.h
index e18089b..cc67d55 100644
--- a/libbcache/dirent.h
+++ b/libbcache/dirent.h
@@ -7,10 +7,13 @@ struct qstr;
struct file;
struct dir_context;
struct cache_set;
+struct bch_hash_info;
-int bch_dirent_create(struct cache_set *c, struct inode *, u8,
- const struct qstr *, u64);
-int bch_dirent_delete(struct cache_set *c, struct inode *, const struct qstr *);
+unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent);
+int bch_dirent_create(struct cache_set *c, u64, const struct bch_hash_info *,
+ u8, const struct qstr *, u64, u64 *, int);
+int bch_dirent_delete(struct cache_set *, u64, const struct bch_hash_info *,
+ const struct qstr *, u64 *);
enum bch_rename_mode {
BCH_RENAME,
@@ -23,8 +26,9 @@ int bch_dirent_rename(struct cache_set *,
struct inode *, const struct qstr *,
u64 *, enum bch_rename_mode);
-u64 bch_dirent_lookup(struct cache_set *c, struct inode *,
+u64 bch_dirent_lookup(struct cache_set *, u64, const struct bch_hash_info *,
const struct qstr *);
+
int bch_empty_dir(struct cache_set *, u64);
int bch_readdir(struct cache_set *, struct file *, struct dir_context *);
diff --git a/libbcache/error.h b/libbcache/error.h
index 9eb9335..33a28c4 100644
--- a/libbcache/error.h
+++ b/libbcache/error.h
@@ -101,38 +101,51 @@ enum {
BCH_FSCK_UNKNOWN_VERSION = 4,
};
-#define unfixable_fsck_err(c, msg, ...) \
-do { \
- bch_err(c, msg " (repair unimplemented)", ##__VA_ARGS__); \
- ret = BCH_FSCK_REPAIR_UNIMPLEMENTED; \
- goto fsck_err; \
-} while (0)
+/* These macros return true if error should be fixed: */
-#define unfixable_fsck_err_on(cond, c, ...) \
-do { \
- if (cond) \
- unfixable_fsck_err(c, __VA_ARGS__); \
-} while (0)
+/* XXX: mark in superblock that filesystem contains errors, if we ignore: */
-#define fsck_err(c, msg, ...) \
-do { \
- if (!(c)->opts.fix_errors) { \
- bch_err(c, msg, ##__VA_ARGS__); \
+#ifndef __fsck_err
+#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \
+({ \
+ bool _fix = false; \
+ \
+ if (_can_fix && (c)->opts.fix_errors) { \
+ bch_err(c, msg ", fixing", ##__VA_ARGS__); \
+ set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \
+ _fix = true; \
+ } else if (_can_ignore && \
+ (c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \
+ bch_err(c, msg " (ignoring)", ##__VA_ARGS__); \
+ } else { \
+ bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \
ret = BCH_FSCK_ERRORS_NOT_FIXED; \
goto fsck_err; \
} \
- set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \
- bch_err(c, msg ", fixing", ##__VA_ARGS__); \
-} while (0)
-
-#define fsck_err_on(cond, c, ...) \
-({ \
- bool _ret = (cond); \
\
- if (_ret) \
- fsck_err(c, __VA_ARGS__); \
- _ret; \
+ BUG_ON(!_fix && !_can_ignore); \
+ _fix; \
})
+#endif
+
+#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \
+ ((cond) ? __fsck_err(c, _can_fix, _can_ignore, \
+ _nofix_msg, ##__VA_ARGS__) : false)
+
+#define unfixable_fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__)
+
+#define need_fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__)
+
+#define mustfix_fsck_err(c, ...) \
+ __fsck_err(c, true, false, "not fixing", ##__VA_ARGS__)
+
+#define mustfix_fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__)
+
+#define fsck_err_on(cond, c, ...) \
+ __fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__)
/*
* Fatal errors: these don't indicate a bug, but we can't continue running in RW
diff --git a/libbcache/extents.c b/libbcache/extents.c
index 45fa220..c026d59 100644
--- a/libbcache/extents.c
+++ b/libbcache/extents.c
@@ -108,15 +108,16 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *dst,
/* Common among btree and extent ptrs */
-bool bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
+const struct bch_extent_ptr *
+bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev)
{
const struct bch_extent_ptr *ptr;
extent_for_each_ptr(e, ptr)
if (ptr->dev == dev)
- return true;
+ return ptr;
- return false;
+ return NULL;
}
unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent e,
diff --git a/libbcache/extents.h b/libbcache/extents.h
index 2dc6446..e1cb47a 100644
--- a/libbcache/extents.h
+++ b/libbcache/extents.h
@@ -1,15 +1,15 @@
#ifndef _BCACHE_EXTENTS_H
#define _BCACHE_EXTENTS_H
+#include "bcache.h"
#include "bkey.h"
#include <linux/bcache.h>
-struct bch_replace_info;
-union bch_extent_crc;
-struct btree_iter;
+struct btree_node_iter;
struct btree_insert;
struct btree_insert_entry;
+struct extent_insert_hook;
struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *,
struct btree *,
@@ -485,7 +485,8 @@ static inline void bch_extent_drop_ptr(struct bkey_s_extent e,
bch_extent_drop_redundant_crcs(e);
}
-bool bch_extent_has_device(struct bkey_s_c_extent, unsigned);
+const struct bch_extent_ptr *
+bch_extent_has_device(struct bkey_s_c_extent, unsigned);
bool bch_cut_front(struct bpos, struct bkey_i *);
bool bch_cut_back(struct bpos, struct bkey *);
diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c
index bd2a867..1dec230 100644
--- a/libbcache/fs-gc.c
+++ b/libbcache/fs-gc.c
@@ -11,6 +11,529 @@
#include <linux/generic-radix-tree.h>
+#define QSTR(n) { { { .len = strlen(n) } }, .name = n }
+
+static int remove_dirent(struct cache_set *c, struct btree_iter *iter,
+ struct bkey_s_c_dirent dirent)
+{
+ struct qstr name;
+ struct bkey_i_inode dir_inode;
+ struct bch_hash_info dir_hash_info;
+ u64 dir_inum = dirent.k->p.inode;
+ int ret;
+ char *buf;
+
+ name.len = bch_dirent_name_bytes(dirent);
+ buf = kmalloc(name.len + 1, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ memcpy(buf, dirent.v->d_name, name.len);
+ buf[name.len] = '\0';
+ name.name = buf;
+
+ /* Unlock iter so we don't deadlock, after copying name: */
+ bch_btree_iter_unlock(iter);
+
+ ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode);
+ if (ret)
+ goto err;
+
+ dir_hash_info = bch_hash_info_init(&dir_inode.v);
+
+ ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL);
+err:
+ kfree(buf);
+ return ret;
+}
+
+static int reattach_inode(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode,
+ u64 inum)
+{
+ struct bch_hash_info lostfound_hash_info =
+ bch_hash_info_init(&lostfound_inode->v);
+ char name_buf[20];
+ struct qstr name;
+ int ret;
+
+ snprintf(name_buf, sizeof(name_buf), "%llu", inum);
+ name = (struct qstr) QSTR(name_buf);
+
+ le32_add_cpu(&lostfound_inode->v.i_nlink, 1);
+
+ ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i,
+ NULL, NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ return bch_dirent_create(c, lostfound_inode->k.p.inode,
+ &lostfound_hash_info,
+ DT_DIR, &name, inum, NULL, 0);
+}
+
+struct inode_walker {
+ bool first_this_inode;
+ bool have_inode;
+ u16 i_mode;
+ u64 i_size;
+ u64 cur_inum;
+ struct bkey_i_inode inode;
+};
+
+static struct inode_walker inode_walker_init(void)
+{
+ return (struct inode_walker) {
+ .cur_inum = -1,
+ .have_inode = false,
+ };
+}
+
+static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum)
+{
+ w->first_this_inode = inum != w->cur_inum;
+ w->cur_inum = inum;
+
+ if (w->first_this_inode) {
+ int ret = bch_inode_find_by_inum(c, inum, &w->inode);
+
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ w->have_inode = !ret;
+
+ if (w->have_inode) {
+ w->i_mode = le16_to_cpu(w->inode.v.i_mode);
+ w->i_size = le64_to_cpu(w->inode.v.i_size);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Walk extents: verify that extents have a corresponding S_ISREG inode, and
+ * that i_size an i_sectors are consistent
+ */
+noinline_for_stack
+static int check_extents(struct cache_set *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ u64 i_sectors;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ if (k.k->type == KEY_TYPE_DISCARD)
+ continue;
+
+ ret = walk_inode(c, &w, k.k->p.inode);
+ if (ret)
+ break;
+
+ unfixable_fsck_err_on(!w.have_inode, c,
+ "extent type %u for missing inode %llu",
+ k.k->type, k.k->p.inode);
+
+ unfixable_fsck_err_on(w.first_this_inode && w.have_inode &&
+ le64_to_cpu(w.inode.v.i_sectors) !=
+ (i_sectors = bch_count_inode_sectors(c, w.cur_inum)),
+ c, "i_sectors wrong: got %llu, should be %llu",
+ le64_to_cpu(w.inode.v.i_sectors), i_sectors);
+
+ unfixable_fsck_err_on(w.have_inode &&
+ !S_ISREG(w.i_mode) && !S_ISLNK(w.i_mode), c,
+ "extent type %u for non regular file, inode %llu mode %o",
+ k.k->type, k.k->p.inode, w.i_mode);
+
+ unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
+ k.k->p.offset > round_up(w.i_size, PAGE_SIZE) >> 9, c,
+ "extent type %u offset %llu past end of inode %llu, i_size %llu",
+ k.k->type, k.k->p.offset, k.k->p.inode, w.i_size);
+ }
+fsck_err:
+ return bch_btree_iter_unlock(&iter) ?: ret;
+}
+
+/*
+ * Walk dirents: verify that they all have a corresponding S_ISDIR inode,
+ * validate d_type
+ */
+noinline_for_stack
+static int check_dirents(struct cache_set *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ struct bkey_s_c_dirent d;
+ struct bkey_i_inode target;
+ bool have_target;
+ u64 d_inum;
+
+ ret = walk_inode(c, &w, k.k->p.inode);
+ if (ret)
+ break;
+
+ unfixable_fsck_err_on(!w.have_inode, c,
+ "dirent in nonexisting directory %llu",
+ k.k->p.inode);
+
+ unfixable_fsck_err_on(!S_ISDIR(w.i_mode), c,
+ "dirent in non directory inode %llu, type %u",
+ k.k->p.inode, mode_to_type(w.i_mode));
+
+ if (k.k->type != BCH_DIRENT)
+ continue;
+
+ d = bkey_s_c_to_dirent(k);
+ d_inum = le64_to_cpu(d.v->d_inum);
+
+ if (fsck_err_on(d_inum == d.k->p.inode, c,
+ "dirent points to own directory")) {
+ ret = remove_dirent(c, &iter, d);
+ if (ret)
+ goto err;
+ continue;
+ }
+
+ ret = bch_inode_find_by_inum(c, d_inum, &target);
+ if (ret && ret != -ENOENT)
+ break;
+
+ have_target = !ret;
+ ret = 0;
+
+ if (fsck_err_on(!have_target, c,
+ "dirent points to missing inode %llu, type %u filename %s",
+ d_inum, d.v->d_type, d.v->d_name)) {
+ ret = remove_dirent(c, &iter, d);
+ if (ret)
+ goto err;
+ continue;
+ }
+
+ if (fsck_err_on(have_target &&
+ d.v->d_type !=
+ mode_to_type(le16_to_cpu(target.v.i_mode)), c,
+ "incorrect d_type: got %u should be %u, filename %s",
+ d.v->d_type,
+ mode_to_type(le16_to_cpu(target.v.i_mode)),
+ d.v->d_name)) {
+ struct bkey_i_dirent *n;
+
+ n = kmalloc(bkey_bytes(d.k), GFP_KERNEL);
+ if (!n) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ bkey_reassemble(&n->k_i, d.s_c);
+ n->v.d_type = mode_to_type(le16_to_cpu(target.v.i_mode));
+
+ ret = bch_btree_insert_at(c, NULL, NULL, NULL,
+ BTREE_INSERT_NOFAIL,
+ BTREE_INSERT_ENTRY(&iter, &n->k_i));
+ kfree(n);
+ if (ret)
+ goto err;
+
+ }
+ }
+err:
+fsck_err:
+ return bch_btree_iter_unlock(&iter) ?: ret;
+}
+
+/*
+ * Walk xattrs: verify that they all have a corresponding inode
+ */
+noinline_for_stack
+static int check_xattrs(struct cache_set *c)
+{
+ struct inode_walker w = inode_walker_init();
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret = 0;
+
+ for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
+ POS(BCACHE_ROOT_INO, 0), k) {
+ ret = walk_inode(c, &w, k.k->p.inode);
+ if (ret)
+ break;
+
+ unfixable_fsck_err_on(!w.have_inode, c,
+ "xattr for missing inode %llu",
+ k.k->p.inode);
+ }
+fsck_err:
+ return bch_btree_iter_unlock(&iter) ?: ret;
+}
+
+/* Get root directory, create if it doesn't exist: */
+static int check_root(struct cache_set *c, struct bkey_i_inode *root_inode)
+{
+ int ret;
+
+ ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode);
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ if (fsck_err_on(ret, c, "root directory missing"))
+ goto create_root;
+
+ if (fsck_err_on(!S_ISDIR(le16_to_cpu(root_inode->v.i_mode)), c,
+ "root inode not a directory"))
+ goto create_root;
+
+ return 0;
+fsck_err:
+ return ret;
+create_root:
+ bch_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
+ root_inode->k.p.inode = BCACHE_ROOT_INO;
+
+ return bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i,
+ NULL, NULL, NULL, 0);
+}
+
+/* Get lost+found, create if it doesn't exist: */
+static int check_lostfound(struct cache_set *c,
+ struct bkey_i_inode *root_inode,
+ struct bkey_i_inode *lostfound_inode)
+{
+ struct qstr lostfound = QSTR("lost+found");
+ struct bch_hash_info root_hash_info = bch_hash_info_init(&root_inode->v);
+ u64 inum;
+ int ret;
+
+ inum = bch_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info,
+ &lostfound);
+ if (!inum) {
+ bch_notice(c, "creating lost+found");
+ goto create_lostfound;
+ }
+
+ ret = bch_inode_find_by_inum(c, inum, lostfound_inode);
+ if (ret && ret != -ENOENT)
+ return ret;
+
+ if (fsck_err_on(ret, c, "lost+found missing"))
+ goto create_lostfound;
+
+ if (fsck_err_on(!S_ISDIR(le16_to_cpu(lostfound_inode->v.i_mode)), c,
+ "lost+found inode not a directory"))
+ goto create_lostfound;
+
+ return 0;
+fsck_err:
+ return ret;
+create_lostfound:
+ le32_add_cpu(&root_inode->v.i_nlink, 1);
+
+ ret = bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i,
+ NULL, NULL, NULL, 0);
+ if (ret)
+ return ret;
+
+ bch_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
+
+ ret = bch_inode_create(c, &lostfound_inode->k_i, BLOCKDEV_INODE_MAX, 0,
+ &c->unused_inode_hint);
+ if (ret)
+ return ret;
+
+ ret = bch_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR,
+ &lostfound, lostfound_inode->k.p.inode, NULL, 0);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+struct inode_bitmap {
+ unsigned long *bits;
+ size_t size;
+};
+
+static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr)
+{
+ return nr < b->size ? test_bit(nr, b->bits) : false;
+}
+
+static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr)
+{
+ if (nr >= b->size) {
+ size_t new_size = max(max(PAGE_SIZE * 8,
+ b->size * 2),
+ nr + 1);
+ void *n;
+
+ new_size = roundup_pow_of_two(new_size);
+ n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO);
+ if (!n)
+ return -ENOMEM;
+
+ b->bits = n;
+ b->size = new_size;
+ }
+
+ __set_bit(nr, b->bits);
+ return 0;
+}
+
+struct pathbuf {
+ size_t nr;
+ size_t size;
+
+ struct pathbuf_entry {
+ u64 inum;
+ u64 offset;
+ } *entries;
+};
+
+static int path_down(struct pathbuf *p, u64 inum)
+{
+ if (p->nr == p->size) {
+ size_t new_size = max(256UL, p->size * 2);
+ void *n = krealloc(p->entries,
+ new_size * sizeof(p->entries[0]),
+ GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ p->entries = n;
+ p->size = new_size;
+ };
+
+ p->entries[p->nr++] = (struct pathbuf_entry) {
+ .inum = inum,
+ .offset = 0,
+ };
+ return 0;
+}
+
+noinline_for_stack
+static int check_directory_structure(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode)
+{
+ struct inode_bitmap dirs_done = { NULL, 0 };
+ struct pathbuf path = { 0, 0, NULL };
+ struct pathbuf_entry *e;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct bkey_s_c_dirent dirent;
+ bool had_unreachable;
+ u64 d_inum;
+ int ret = 0;
+
+ /* DFS: */
+restart_dfs:
+ ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO);
+ if (ret)
+ goto err;
+
+ ret = path_down(&path, BCACHE_ROOT_INO);
+ if (ret)
+ return ret;
+
+ while (path.nr) {
+next:
+ e = &path.entries[path.nr - 1];
+
+ if (e->offset == U64_MAX)
+ goto up;
+
+ for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
+ POS(e->inum, e->offset + 1), k) {
+ if (k.k->p.inode != e->inum)
+ break;
+
+ e->offset = k.k->p.offset;
+
+ if (k.k->type != BCH_DIRENT)
+ continue;
+
+ dirent = bkey_s_c_to_dirent(k);
+
+ if (dirent.v->d_type != DT_DIR)
+ continue;
+
+ d_inum = le64_to_cpu(dirent.v->d_inum);
+
+ if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c,
+ "directory with multiple hardlinks")) {
+ ret = remove_dirent(c, &iter, dirent);
+ if (ret)
+ goto err;
+ continue;
+ }
+
+ ret = inode_bitmap_set(&dirs_done, d_inum);
+ if (ret)
+ goto err;
+
+ ret = path_down(&path, d_inum);
+ if (ret)
+ goto err;
+
+ bch_btree_iter_unlock(&iter);
+ goto next;
+ }
+ ret = bch_btree_iter_unlock(&iter);
+ if (ret)
+ goto err;
+up:
+ path.nr--;
+ }
+
+ had_unreachable = false;
+
+ for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) {
+ if (k.k->type != BCH_INODE_FS ||
+ !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode)))
+ continue;
+
+ if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c,
+ "unreachable directory found (inum %llu)",
+ k.k->p.inode)) {
+ bch_btree_iter_unlock(&iter);
+
+ ret = reattach_inode(c, lostfound_inode, k.k->p.inode);
+ if (ret)
+ goto err;
+
+ had_unreachable = true;
+ }
+ }
+ ret = bch_btree_iter_unlock(&iter);
+ if (ret)
+ goto err;
+
+ if (had_unreachable) {
+ bch_info(c, "reattached unreachable directories, restarting pass to check for loops");
+ kfree(dirs_done.bits);
+ kfree(path.entries);
+ memset(&dirs_done, 0, sizeof(dirs_done));
+ memset(&path, 0, sizeof(path));
+ goto restart_dfs;
+ }
+
+out:
+ kfree(dirs_done.bits);
+ kfree(path.entries);
+ return ret;
+err:
+fsck_err:
+ ret = bch_btree_iter_unlock(&iter) ?: ret;
+ goto out;
+}
+
struct nlink {
u32 count;
u32 dir_count;
@@ -40,11 +563,6 @@ static void inc_link(struct cache_set *c, struct nlinks *links,
link->count++;
}
-/*
- * XXX: should do a DFS (via filesystem heirarchy), and make sure all dirents
- * are reachable
- */
-
noinline_for_stack
static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links,
u64 range_start, u64 *range_end)
@@ -99,7 +617,9 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum)
return bch_btree_iter_unlock(&iter) ?: sectors;
}
-static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
+static int bch_gc_do_inode(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode,
+ struct btree_iter *iter,
struct bkey_s_c_inode inode, struct nlink link)
{
u16 i_mode = le16_to_cpu(inode.v->i_mode);
@@ -115,14 +635,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
inode.k->p.inode, i_nlink,
link.count, mode_to_type(i_mode));
+ /* These should have been caught/fixed by earlier passes: */
if (S_ISDIR(i_mode)) {
- unfixable_fsck_err_on(link.count > 1, c,
+ need_fsck_err_on(link.count > 1, c,
"directory %llu with multiple hardlinks: %u",
inode.k->p.inode, link.count);
real_i_nlink = link.count * 2 + link.dir_count;
} else {
- unfixable_fsck_err_on(link.dir_count, c,
+ need_fsck_err_on(link.dir_count, c,
"found dirents for non directory %llu",
inode.k->p.inode);
@@ -135,11 +656,16 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
"but found orphaned inode %llu",
inode.k->p.inode);
- unfixable_fsck_err_on(S_ISDIR(i_mode) &&
- bch_empty_dir(c, inode.k->p.inode), c,
- "non empty directory with link count 0, "
- "inode nlink %u, dir links found %u",
- i_nlink, link.dir_count);
+ if (fsck_err_on(S_ISDIR(i_mode) &&
+ bch_empty_dir(c, inode.k->p.inode), c,
+ "non empty directory with link count 0, "
+ "inode nlink %u, dir links found %u",
+ i_nlink, link.dir_count)) {
+ ret = reattach_inode(c, lostfound_inode,
+ inode.k->p.inode);
+ if (ret)
+ return ret;
+ }
bch_verbose(c, "deleting inode %llu", inode.k->p.inode);
@@ -235,7 +761,9 @@ fsck_err:
}
noinline_for_stack
-static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links,
+static int bch_gc_walk_inodes(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode,
+ struct nlinks *links,
u64 range_start, u64 range_end)
{
struct btree_iter iter;
@@ -257,7 +785,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
nlinks_pos = range_start + nlinks_iter.pos;
if (iter.pos.inode > nlinks_pos) {
- unfixable_fsck_err_on(link && link->count, c,
+ /* Should have been caught by dirents pass: */
+ need_fsck_err_on(link && link->count, c,
"missing inode %llu (nlink %u)",
nlinks_pos, link->count);
genradix_iter_advance(&nlinks_iter, links);
@@ -274,9 +803,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
*/
bch_btree_iter_unlock(&iter);
- ret = bch_gc_do_inode(c, &iter,
- bkey_s_c_to_inode(k),
- *link);
+ ret = bch_gc_do_inode(c, lostfound_inode, &iter,
+ bkey_s_c_to_inode(k), *link);
if (ret == -EINTR)
continue;
if (ret)
@@ -285,7 +813,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links);
if (link->count)
atomic_long_inc(&c->nr_inodes);
} else {
- unfixable_fsck_err_on(link->count, c,
+ /* Should have been caught by dirents pass: */
+ need_fsck_err_on(link->count, c,
"missing inode %llu (nlink %u)",
nlinks_pos, link->count);
}
@@ -304,7 +833,9 @@ fsck_err:
return ret ?: ret2;
}
-int bch_gc_inode_nlinks(struct cache_set *c)
+noinline_for_stack
+static int check_inode_nlinks(struct cache_set *c,
+ struct bkey_i_inode *lostfound_inode)
{
struct nlinks links;
u64 this_iter_range_start, next_iter_range_start = 0;
@@ -322,7 +853,7 @@ int bch_gc_inode_nlinks(struct cache_set *c)
if (ret)
break;
- ret = bch_gc_walk_inodes(c, &links,
+ ret = bch_gc_walk_inodes(c, lostfound_inode, &links,
this_iter_range_start,
next_iter_range_start);
if (ret)
@@ -336,140 +867,45 @@ int bch_gc_inode_nlinks(struct cache_set *c)
return ret;
}
-static void next_inode(struct cache_set *c, u64 inum, u64 *cur_inum,
- struct bkey_i_inode *inode,
- bool *first_this_inode, bool *have_inode,
- u64 *i_size, u16 *i_mode)
-{
- *first_this_inode = inum != *cur_inum;
- *cur_inum = inum;
-
- if (*first_this_inode) {
- *have_inode = !bch_inode_find_by_inum(c, inum, inode);
-
- if (*have_inode) {
- *i_mode = le16_to_cpu(inode->v.i_mode);
- *i_size = le64_to_cpu(inode->v.i_size);
- }
- }
-}
-
/*
* Checks for inconsistencies that shouldn't happen, unless we have a bug.
* Doesn't fix them yet, mainly because they haven't yet been observed:
*/
-int bch_fsck(struct cache_set *c)
+int bch_fsck(struct cache_set *c, bool full_fsck)
{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_i_inode inode;
- bool first_this_inode, have_inode;
- u64 cur_inum, i_sectors;
- u64 i_size = 0;
- u16 i_mode = 0;
- int ret = 0;
-
- cur_inum = -1;
- have_inode = false;
- for_each_btree_key(&iter, c, BTREE_ID_EXTENTS,
- POS(BCACHE_ROOT_INO, 0), k) {
- if (k.k->type == KEY_TYPE_DISCARD)
- continue;
-
- next_inode(c, k.k->p.inode, &cur_inum, &inode,
- &first_this_inode, &have_inode,
- &i_size, &i_mode);
-
- unfixable_fsck_err_on(!have_inode, c,
- "extent type %u for missing inode %llu",
- k.k->type, k.k->p.inode);
-
- unfixable_fsck_err_on(first_this_inode && have_inode &&
- le64_to_cpu(inode.v.i_sectors) !=
- (i_sectors = bch_count_inode_sectors(c, cur_inum)),
- c, "i_sectors wrong: got %llu, should be %llu",
- le64_to_cpu(inode.v.i_sectors), i_sectors);
-
- unfixable_fsck_err_on(have_inode &&
- !S_ISREG(i_mode) && !S_ISLNK(i_mode), c,
- "extent type %u for non regular file, inode %llu mode %o",
- k.k->type, k.k->p.inode, i_mode);
+ struct bkey_i_inode root_inode, lostfound_inode;
+ int ret;
- unfixable_fsck_err_on(k.k->type != BCH_RESERVATION &&
- k.k->p.offset > round_up(i_size, PAGE_SIZE) >> 9, c,
- "extent type %u offset %llu past end of inode %llu, i_size %llu",
- k.k->type, k.k->p.offset, k.k->p.inode, i_size);
- }
- ret = bch_btree_iter_unlock(&iter);
+ ret = check_root(c, &root_inode);
if (ret)
return ret;
- cur_inum = -1;
- have_inode = false;
- for_each_btree_key(&iter, c, BTREE_ID_DIRENTS,
- POS(BCACHE_ROOT_INO, 0), k) {
- struct bkey_s_c_dirent d;
- struct bkey_i_inode target;
- bool have_target;
- u64 d_inum;
-
- next_inode(c, k.k->p.inode, &cur_inum, &inode,
- &first_this_inode, &have_inode,
- &i_size, &i_mode);
-
- unfixable_fsck_err_on(!have_inode, c,
- "dirent in nonexisting directory %llu",
- k.k->p.inode);
-
- unfixable_fsck_err_on(!S_ISDIR(i_mode), c,
- "dirent in non directory inode %llu, type %u",
- k.k->p.inode, mode_to_type(i_mode));
-
- if (k.k->type != BCH_DIRENT)
- continue;
-
- d = bkey_s_c_to_dirent(k);
- d_inum = le64_to_cpu(d.v->d_inum);
-
- unfixable_fsck_err_on(d_inum == d.k->p.inode, c,
- "dirent points to own directory");
+ ret = check_lostfound(c, &root_inode, &lostfound_inode);
+ if (ret)
+ return ret;
- have_target = !bch_inode_find_by_inum(c, d_inum, &target);
+ if (!full_fsck)
+ goto check_nlinks;
- unfixable_fsck_err_on(!have_target, c,
- "dirent points to missing inode %llu, type %u filename %s",
- d_inum, d.v->d_type, d.v->d_name);
+ ret = check_extents(c);
+ if (ret)
+ return ret;
- unfixable_fsck_err_on(have_target &&
- d.v->d_type !=
- mode_to_type(le16_to_cpu(target.v.i_mode)), c,
- "incorrect d_type: got %u should be %u, filename %s",
- d.v->d_type,
- mode_to_type(le16_to_cpu(target.v.i_mode)),
- d.v->d_name);
- }
- ret = bch_btree_iter_unlock(&iter);
+ ret = check_dirents(c);
if (ret)
return ret;
- cur_inum = -1;
- have_inode = false;
- for_each_btree_key(&iter, c, BTREE_ID_XATTRS,
- POS(BCACHE_ROOT_INO, 0), k) {
- next_inode(c, k.k->p.inode, &cur_inum, &inode,
- &first_this_inode, &have_inode,
- &i_size, &i_mode);
+ ret = check_xattrs(c);
+ if (ret)
+ return ret;
- unfixable_fsck_err_on(!have_inode, c,
- "xattr for missing inode %llu",
- k.k->p.inode);
- }
- ret = bch_btree_iter_unlock(&iter);
+ ret = check_directory_structure(c, &lostfound_inode);
+ if (ret)
+ return ret;
+check_nlinks:
+ ret = check_inode_nlinks(c, &lostfound_inode);
if (ret)
return ret;
return 0;
-fsck_err:
- bch_btree_iter_unlock(&iter);
- return ret;
}
diff --git a/libbcache/fs-gc.h b/libbcache/fs-gc.h
index c44086c..ca6571a 100644
--- a/libbcache/fs-gc.h
+++ b/libbcache/fs-gc.h
@@ -2,7 +2,6 @@
#define _BCACHE_FS_GC_H
s64 bch_count_inode_sectors(struct cache_set *, u64);
-int bch_gc_inode_nlinks(struct cache_set *);
-int bch_fsck(struct cache_set *);
+int bch_fsck(struct cache_set *, bool);
#endif /* _BCACHE_FS_GC_H */
diff --git a/libbcache/fs.c b/libbcache/fs.c
index 1f01e48..884a950 100644
--- a/libbcache/fs.c
+++ b/libbcache/fs.c
@@ -26,7 +26,7 @@
static struct kmem_cache *bch_inode_cache;
-static void bch_inode_init(struct bch_inode_info *, struct bkey_s_c_inode);
+static void bch_vfs_inode_init(struct bch_inode_info *, struct bkey_s_c_inode);
/*
* I_SIZE_DIRTY requires special handling:
@@ -175,7 +175,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum)
}
ei = to_bch_ei(inode);
- bch_inode_init(ei, bkey_s_c_to_inode(k));
+ bch_vfs_inode_init(ei, bkey_s_c_to_inode(k));
ei->journal_seq = bch_inode_journal_seq(&c->journal, inum);
@@ -193,10 +193,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
struct inode *inode;
struct posix_acl *default_acl = NULL, *acl = NULL;
struct bch_inode_info *ei;
- struct bch_inode *bi;
struct bkey_i_inode bkey_inode;
- struct timespec ts = CURRENT_TIME;
- s64 now = timespec_to_ns(&ts);
int ret;
inode = new_inode(parent->i_sb);
@@ -213,19 +210,8 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
ei = to_bch_ei(inode);
- bi = &bkey_inode_init(&bkey_inode.k_i)->v;
- bi->i_uid = cpu_to_le32(i_uid_read(inode));
- bi->i_gid = cpu_to_le32(i_gid_read(inode));
-
- bi->i_mode = cpu_to_le16(inode->i_mode);
- bi->i_dev = cpu_to_le32(rdev);
- bi->i_atime = cpu_to_le64(now);
- bi->i_mtime = cpu_to_le64(now);
- bi->i_ctime = cpu_to_le64(now);
- bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1);
-
- get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed));
- SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type);
+ bch_inode_init(c, &bkey_inode, i_uid_read(inode),
+ i_gid_read(inode), inode->i_mode, rdev);
ret = bch_inode_create(c, &bkey_inode.k_i,
BLOCKDEV_INODE_MAX, 0,
@@ -239,7 +225,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c,
goto err;
}
- bch_inode_init(ei, inode_i_to_s_c(&bkey_inode));
+ bch_vfs_inode_init(ei, inode_i_to_s_c(&bkey_inode));
if (default_acl) {
ret = bch_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
@@ -270,9 +256,13 @@ static int bch_vfs_dirent_create(struct cache_set *c, struct inode *dir,
u8 type, const struct qstr *name,
struct inode *dst)
{
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
int ret;
- ret = bch_dirent_create(c, dir, type, name, dst->i_ino);
+ ret = bch_dirent_create(c, dir->i_ino, &dir_ei->str_hash,
+ type, name, dst->i_ino,
+ &dir_ei->journal_seq,
+ BCH_HASH_SET_MUST_CREATE);
if (unlikely(ret))
return ret;
@@ -317,10 +307,13 @@ static struct dentry *bch_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct cache_set *c = dir->i_sb->s_fs_info;
+ struct bch_inode_info *dir_ei = to_bch_ei(dir);
struct inode *inode = NULL;
u64 inum;
- inum = bch_dirent_lookup(c, dir, &dentry->d_name);
+ inum = bch_dirent_lookup(c, dir->i_ino,
+ &dir_ei->str_hash,
+ &dentry->d_name);
if (inum)
inode = bch_vfs_inode_get(dir->i_sb, inum);
@@ -374,7 +367,8 @@ static int bch_unlink(struct inode *dir, struct dentry *dentry)
lockdep_assert_held(&inode->i_rwsem);
- ret = bch_dirent_delete(c, dir, &dentry->d_name);
+ ret = bch_dirent_delete(c, dir->i_ino, &dir_ei->str_hash,
+ &dentry->d_name, &dir_ei->journal_seq);
if (ret)
return ret;
@@ -1016,8 +1010,8 @@ static const struct address_space_operations bch_address_space_operations = {
.error_remove_page = generic_error_remove_page,
};
-static void bch_inode_init(struct bch_inode_info *ei,
- struct bkey_s_c_inode bkey_inode)
+static void bch_vfs_inode_init(struct bch_inode_info *ei,
+ struct bkey_s_c_inode bkey_inode)
{
struct inode *inode = &ei->vfs_inode;
const struct bch_inode *bi = bkey_inode.v;
@@ -1044,8 +1038,7 @@ static void bch_inode_init(struct bch_inode_info *ei,
inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime));
bch_inode_flags_to_vfs(inode);
- ei->str_hash.seed = le64_to_cpu(bi->i_hash_seed);
- ei->str_hash.type = INODE_STR_HASH_TYPE(bi);
+ ei->str_hash = bch_hash_info_init(bi);
inode->i_mapping->a_ops = &bch_address_space_operations;
diff --git a/libbcache/inode.c b/libbcache/inode.c
index d36de43..200deb0 100644
--- a/libbcache/inode.c
+++ b/libbcache/inode.c
@@ -7,6 +7,8 @@
#include "io.h"
#include "keylist.h"
+#include <linux/random.h>
+
ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k)
{
if (k->p.offset)
@@ -105,6 +107,28 @@ const struct bkey_ops bch_bkey_inode_ops = {
.val_to_text = bch_inode_to_text,
};
+void bch_inode_init(struct cache_set *c, struct bkey_i_inode *inode,
+ uid_t uid, gid_t gid, umode_t mode, dev_t rdev)
+{
+ struct timespec ts = CURRENT_TIME;
+ s64 now = timespec_to_ns(&ts);
+ struct bch_inode *bi;
+
+ bi = &bkey_inode_init(&inode->k_i)->v;
+ bi->i_uid = cpu_to_le32(uid);
+ bi->i_gid = cpu_to_le32(gid);
+
+ bi->i_mode = cpu_to_le16(mode);
+ bi->i_dev = cpu_to_le32(rdev);
+ bi->i_atime = cpu_to_le64(now);
+ bi->i_mtime = cpu_to_le64(now);
+ bi->i_ctime = cpu_to_le64(now);
+ bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1);
+
+ get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed));
+ SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type);
+}
+
int bch_inode_create(struct cache_set *c, struct bkey_i *inode,
u64 min, u64 max, u64 *hint)
{
@@ -228,15 +252,14 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
{
struct btree_iter iter;
struct bkey_s_c k;
- int ret = -ENOENT;
for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES,
POS(inode_nr, 0), k) {
switch (k.k->type) {
case BCH_INODE_FS:
- ret = 0;
bkey_reassemble(&inode->k_i, k);
- break;
+ bch_btree_iter_unlock(&iter);
+ return 0;
default:
/* hole, not found */
break;
@@ -245,9 +268,8 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr,
break;
}
- bch_btree_iter_unlock(&iter);
- return ret;
+ return bch_btree_iter_unlock(&iter) ?: -ENOENT;
}
int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid,
diff --git a/libbcache/inode.h b/libbcache/inode.h
index d8b28c7..fa1a4cf 100644
--- a/libbcache/inode.h
+++ b/libbcache/inode.h
@@ -5,6 +5,8 @@ extern const struct bkey_ops bch_bkey_inode_ops;
ssize_t bch_inode_status(char *, size_t, const struct bkey *);
+void bch_inode_init(struct cache_set *, struct bkey_i_inode *,
+ uid_t, gid_t, umode_t, dev_t);
int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *);
int bch_inode_truncate(struct cache_set *, u64, u64,
struct extent_insert_hook *, u64 *);
diff --git a/libbcache/io.c b/libbcache/io.c
index 7219b65..4112ea5 100644
--- a/libbcache/io.c
+++ b/libbcache/io.c
@@ -140,6 +140,8 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c,
struct bch_write_bio *n;
struct cache *ca;
+ BUG_ON(c->opts.nochanges);
+
wbio->split = false;
wbio->c = c;
@@ -738,7 +740,8 @@ void bch_write(struct closure *cl)
!(op->flags & BCH_WRITE_CACHED),
op->flags & BCH_WRITE_DISCARD);
- if (!percpu_ref_tryget(&c->writes)) {
+ if (c->opts.nochanges ||
+ !percpu_ref_tryget(&c->writes)) {
__bcache_io_error(c, "read only");
op->error = -EROFS;
bch_disk_reservation_put(c, &op->res);
diff --git a/libbcache/journal.c b/libbcache/journal.c
index ffc9573..9e09b86 100644
--- a/libbcache/journal.c
+++ b/libbcache/journal.c
@@ -478,14 +478,14 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
char buf[160];
int ret = 0;
- if (fsck_err_on(!k->k.u64s, c,
+ if (mustfix_fsck_err_on(!k->k.u64s, c,
"invalid %s in journal: k->u64s 0", type)) {
entry->u64s = cpu_to_le16((u64 *) k - entry->_data);
journal_entry_null_range(jset_keys_next(entry), next);
return 0;
}
- if (fsck_err_on((void *) bkey_next(k) >
+ if (mustfix_fsck_err_on((void *) bkey_next(k) >
(void *) jset_keys_next(entry), c,
"invalid %s in journal: extends past end of journal entry",
type)) {
@@ -494,7 +494,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
return 0;
}
- if (fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
+ if (mustfix_fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c,
"invalid %s in journal: bad format %u",
type, k->k.format)) {
le16_add_cpu(&entry->u64s, -k->k.u64s);
@@ -510,7 +510,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j,
if (invalid) {
bch_bkey_val_to_text(c, key_type, buf, sizeof(buf),
bkey_i_to_s_c(k));
- fsck_err(c, "invalid %s in journal: %s", type, buf);
+ mustfix_fsck_err(c, "invalid %s in journal: %s", type, buf);
le16_add_cpu(&entry->u64s, -k->k.u64s);
memmove(k, bkey_next(k), next - (void *) bkey_next(k));
@@ -543,7 +543,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
return BCH_FSCK_UNKNOWN_VERSION;
}
- if (fsck_err_on(bytes > bucket_sectors_left << 9 ||
+ if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 ||
bytes > c->journal.entry_size_max, c,
"journal entry too big (%zu bytes), sector %lluu",
bytes, sector)) {
@@ -556,7 +556,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
got = le64_to_cpu(j->csum);
expect = __csum_set(j, le32_to_cpu(j->u64s), JSET_CSUM_TYPE(j));
- if (fsck_err_on(got != expect, c,
+ if (mustfix_fsck_err_on(got != expect, c,
"journal checksum bad (got %llu expect %llu), sector %lluu",
got, expect, sector)) {
/* XXX: retry IO, when we start retrying checksum errors */
@@ -564,14 +564,14 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
return JOURNAL_ENTRY_BAD;
}
- if (fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c,
- "invalid journal entry: last_seq > seq"))
+ if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq),
+ c, "invalid journal entry: last_seq > seq"))
j->last_seq = j->seq;
for_each_jset_entry(entry, j) {
struct bkey_i *k;
- if (fsck_err_on(jset_keys_next(entry) >
+ if (mustfix_fsck_err_on(jset_keys_next(entry) >
bkey_idx(j, le32_to_cpu(j->u64s)), c,
"journal entry extents past end of jset")) {
j->u64s = cpu_to_le64((u64 *) entry - j->_data);
@@ -595,7 +595,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
case JOURNAL_ENTRY_BTREE_ROOT:
k = entry->start;
- if (fsck_err_on(!entry->u64s ||
+ if (mustfix_fsck_err_on(!entry->u64s ||
le16_to_cpu(entry->u64s) != k->k.u64s, c,
"invalid btree root journal entry: wrong number of keys")) {
journal_entry_null_range(entry,
@@ -613,7 +613,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
break;
case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED:
- if (fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
+ if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c,
"invalid journal seq blacklist entry: bad size")) {
journal_entry_null_range(entry,
jset_keys_next(entry));
@@ -621,7 +621,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto
break;
default:
- fsck_err(c, "invalid journal entry type %llu",
+ mustfix_fsck_err(c, "invalid journal entry type %llu",
JOURNAL_ENTRY_TYPE(entry));
journal_entry_null_range(entry, jset_keys_next(entry));
break;
@@ -2065,6 +2065,13 @@ static void journal_write(struct closure *cl)
bch_check_mark_super(c, &j->key, true);
+ /*
+ * XXX: we really should just disable the entire journal in nochanges
+ * mode
+ */
+ if (c->opts.nochanges)
+ goto no_io;
+
extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) {
rcu_read_lock();
ca = PTR_CACHE(c, ptr);
@@ -2094,8 +2101,6 @@ static void journal_write(struct closure *cl)
trace_bcache_journal_write(bio);
closure_bio_submit_punt(bio, cl, c);
- ptr->offset += sectors;
-
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
}
@@ -2114,6 +2119,10 @@ static void journal_write(struct closure *cl)
closure_bio_submit_punt(bio, cl, c);
}
+no_io:
+ extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr)
+ ptr->offset += sectors;
+
closure_return_with_destructor(cl, journal_write_done);
}
diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c
index 3c85d49..cb4f165 100644
--- a/libbcache/movinggc.c
+++ b/libbcache/movinggc.c
@@ -26,14 +26,11 @@ static const struct bch_extent_ptr *moving_pred(struct cache *ca,
{
const struct bch_extent_ptr *ptr;
- if (bkey_extent_is_data(k.k)) {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
-
- extent_for_each_ptr(e, ptr)
- if ((ca->sb.nr_this_dev == ptr->dev) &&
- PTR_BUCKET(ca, ptr)->mark.copygc)
- return ptr;
- }
+ if (bkey_extent_is_data(k.k) &&
+ (ptr = bch_extent_has_device(bkey_s_c_to_extent(k),
+ ca->sb.nr_this_dev)) &&
+ PTR_BUCKET(ca, ptr)->mark.copygc)
+ return ptr;
return NULL;
}
@@ -274,6 +271,9 @@ int bch_moving_gc_thread_start(struct cache *ca)
/* The moving gc read thread must be stopped */
BUG_ON(ca->moving_gc_read != NULL);
+ if (ca->set->opts.nochanges)
+ return 0;
+
if (cache_set_init_fault("moving_gc_start"))
return -ENOMEM;
diff --git a/libbcache/opts.c b/libbcache/opts.c
index 249dd5d..60a2a4d 100644
--- a/libbcache/opts.c
+++ b/libbcache/opts.c
@@ -4,16 +4,6 @@
#include "opts.h"
#include "util.h"
-const char * const bch_bool_opt[] = {
- "0",
- "1",
- NULL
-};
-
-const char * const bch_uint_opt[] = {
- NULL
-};
-
const char * const bch_error_actions[] = {
"continue",
"remount-ro",
@@ -43,6 +33,42 @@ const char * const bch_str_hash_types[] = {
NULL
};
+const char * const bch_cache_replacement_policies[] = {
+ "lru",
+ "fifo",
+ "random",
+ NULL
+};
+
+/* Default is -1; we skip past it for struct cached_dev's cache mode */
+const char * const bch_cache_modes[] = {
+ "default",
+ "writethrough",
+ "writeback",
+ "writearound",
+ "none",
+ NULL
+};
+
+const char * const bch_cache_state[] = {
+ "active",
+ "readonly",
+ "failed",
+ "spare",
+ NULL
+};
+
+
+const char * const bch_bool_opt[] = {
+ "0",
+ "1",
+ NULL
+};
+
+const char * const bch_uint_opt[] = {
+ NULL
+};
+
enum bch_opts {
#define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \
Opt_##_name,
diff --git a/libbcache/opts.h b/libbcache/opts.h
index 1d19ac6..70df232 100644
--- a/libbcache/opts.h
+++ b/libbcache/opts.h
@@ -6,6 +6,14 @@
#include <linux/log2.h>
#include <linux/string.h>
+extern const char * const bch_error_actions[];
+extern const char * const bch_csum_types[];
+extern const char * const bch_compression_types[];
+extern const char * const bch_str_hash_types[];
+extern const char * const bch_cache_replacement_policies[];
+extern const char * const bch_cache_modes[];
+extern const char * const bch_cache_state[];
+
/*
* Mount options; we also store defaults in the superblock.
*
@@ -20,10 +28,6 @@
extern const char * const bch_bool_opt[];
extern const char * const bch_uint_opt[];
-extern const char * const bch_error_actions[];
-extern const char * const bch_csum_types[];
-extern const char * const bch_compression_types[];
-extern const char * const bch_str_hash_types[];
/* dummy option, for options that aren't stored in the superblock */
LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0);
@@ -44,6 +48,15 @@ LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0);
CACHE_SET_OPT(fix_errors, \
bch_bool_opt, 0, 2, \
NO_SB_OPT, true) \
+ CACHE_SET_OPT(nochanges, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ CACHE_SET_OPT(noreplay, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
+ CACHE_SET_OPT(norecovery, \
+ bch_bool_opt, 0, 2, \
+ NO_SB_OPT, 0) \
CACHE_SET_SB_OPTS()
#define CACHE_SET_OPTS() \
diff --git a/libbcache/str_hash.h b/libbcache/str_hash.h
index 9a718a8..a489304 100644
--- a/libbcache/str_hash.h
+++ b/libbcache/str_hash.h
@@ -79,6 +79,14 @@ struct bch_hash_info {
u8 type;
};
+static inline struct bch_hash_info bch_hash_info_init(const struct bch_inode *bi)
+{
+ return (struct bch_hash_info) {
+ .seed = le64_to_cpu(bi->i_hash_seed),
+ .type = INODE_STR_HASH_TYPE(bi),
+ };
+}
+
struct bch_hash_desc {
enum btree_id btree_id;
u8 key_type;
diff --git a/libbcache/super.c b/libbcache/super.c
index 5f6a85e..296700b 100644
--- a/libbcache/super.c
+++ b/libbcache/super.c
@@ -99,14 +99,17 @@ static bool bch_is_open(struct block_device *bdev)
}
static const char *bch_blkdev_open(const char *path, void *holder,
+ struct cache_set_opts opts,
struct block_device **ret)
{
struct block_device *bdev;
+ fmode_t mode = opts.nochanges > 0
+ ? FMODE_READ
+ : FMODE_READ|FMODE_WRITE|FMODE_EXCL;
const char *err;
*ret = NULL;
- bdev = blkdev_get_by_path(path, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- holder);
+ bdev = blkdev_get_by_path(path, mode, holder);
if (bdev == ERR_PTR(-EBUSY)) {
bdev = lookup_bdev(path);
@@ -369,6 +372,7 @@ int bch_super_realloc(struct bcache_superblock *sb, unsigned u64s)
}
static const char *read_super(struct bcache_superblock *sb,
+ struct cache_set_opts opts,
const char *path)
{
const char *err;
@@ -378,7 +382,7 @@ static const char *read_super(struct bcache_superblock *sb,
memset(sb, 0, sizeof(*sb));
- err = bch_blkdev_open(path, &sb, &sb->bdev);
+ err = bch_blkdev_open(path, &sb, opts, &sb->bdev);
if (err)
return err;
retry:
@@ -614,6 +618,9 @@ static void __bcache_write_super(struct cache_set *c)
closure_init(cl, &c->cl);
+ if (c->opts.nochanges)
+ goto no_io;
+
le64_add_cpu(&c->disk_sb.seq, 1);
for_each_cache(ca, c, i) {
@@ -636,7 +643,7 @@ static void __bcache_write_super(struct cache_set *c)
percpu_ref_get(&ca->ref);
__write_super(c, &ca->disk_sb);
}
-
+no_io:
closure_return_with_destructor(cl, bcache_write_super_unlock);
}
@@ -1147,6 +1154,9 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb,
c->opts = cache_superblock_opts(sb);
cache_set_opts_apply(&c->opts, opts);
+ c->opts.nochanges |= c->opts.noreplay;
+ c->opts.read_only |= c->opts.nochanges;
+
c->block_bits = ilog2(c->sb.block_size);
if (cache_set_init_fault("cache_set_alloc"))
@@ -1339,6 +1349,9 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_initial_gc(c, &journal))
goto err;
+ if (c->opts.noreplay)
+ goto recovery_done;
+
bch_verbose(c, "mark and sweep done");
/*
@@ -1365,6 +1378,9 @@ static const char *run_cache_set(struct cache_set *c)
bch_verbose(c, "journal replay done");
+ if (c->opts.norecovery)
+ goto recovery_done;
+
/*
* Write a new journal entry _before_ we start journalling new
* data - otherwise, we could end up with btree node bsets with
@@ -1376,21 +1392,12 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_journal_meta(&c->journal))
goto err;
- bch_verbose(c, "starting fs gc:");
- err = "error in fs gc";
- ret = bch_gc_inode_nlinks(c);
+ bch_verbose(c, "starting fsck:");
+ err = "error in fsck";
+ ret = bch_fsck(c, !c->opts.nofsck);
if (ret)
goto err;
- bch_verbose(c, "fs gc done");
-
- if (!c->opts.nofsck) {
- bch_verbose(c, "starting fsck:");
- err = "error in fsck";
- ret = bch_fsck(c);
- if (ret)
- goto err;
- bch_verbose(c, "fsck done");
- }
+ bch_verbose(c, "fsck done");
} else {
struct bkey_i_inode inode;
struct closure cl;
@@ -1433,12 +1440,9 @@ static const char *run_cache_set(struct cache_set *c)
/* Wait for new btree roots to be written: */
closure_sync(&cl);
- bkey_inode_init(&inode.k_i);
+ bch_inode_init(c, &inode, 0, 0,
+ S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0);
inode.k.p.inode = BCACHE_ROOT_INO;
- inode.v.i_mode = cpu_to_le16(S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO);
- inode.v.i_nlink = cpu_to_le32(2);
- get_random_bytes(&inode.v.i_hash_seed, sizeof(inode.v.i_hash_seed));
- SET_INODE_STR_HASH_TYPE(&inode.v, c->sb.str_hash_type);
err = "error creating root directory";
if (bch_btree_insert(c, BTREE_ID_INODES, &inode.k_i,
@@ -1449,7 +1453,7 @@ static const char *run_cache_set(struct cache_set *c)
if (bch_journal_meta(&c->journal))
goto err;
}
-
+recovery_done:
if (c->opts.read_only) {
bch_cache_set_read_only_sync(c);
} else {
@@ -1485,12 +1489,12 @@ static const char *run_cache_set(struct cache_set *c)
set_bit(CACHE_SET_RUNNING, &c->flags);
bch_attach_backing_devs(c);
- closure_put(&c->caching);
-
bch_notify_cache_set_read_write(c);
-
- BUG_ON(!list_empty(&journal));
- return NULL;
+ err = NULL;
+out:
+ bch_journal_entries_free(&journal);
+ closure_put(&c->caching);
+ return err;
err:
switch (ret) {
case BCH_FSCK_ERRORS_NOT_FIXED:
@@ -1519,12 +1523,8 @@ err:
}
BUG_ON(!err);
-
- bch_journal_entries_free(&journal);
set_bit(CACHE_SET_ERROR, &c->flags);
- bch_cache_set_unregister(c);
- closure_put(&c->caching);
- return err;
+ goto out;
}
static const char *can_add_cache(struct cache_sb *sb,
@@ -2056,8 +2056,9 @@ static const char *register_cache(struct bcache_superblock *sb,
struct cache_set_opts opts)
{
char name[BDEVNAME_SIZE];
- const char *err = "cannot allocate memory";
+ const char *err;
struct cache_set *c;
+ bool allocated_cache_set = false;
err = validate_cache_super(sb);
if (err)
@@ -2067,41 +2068,36 @@ static const char *register_cache(struct bcache_superblock *sb,
c = cache_set_lookup(sb->sb->set_uuid);
if (c) {
- if ((err = (can_attach_cache(sb->sb, c) ?:
- cache_alloc(sb, c, NULL))))
+ err = can_attach_cache(sb->sb, c);
+ if (err)
return err;
+ } else {
+ c = bch_cache_set_alloc(sb->sb, opts);
+ if (!c)
+ return "cannot allocate memory";
- if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) {
- err = run_cache_set(c);
- if (err)
- return err;
- }
- goto out;
+ allocated_cache_set = true;
}
- c = bch_cache_set_alloc(sb->sb, opts);
- if (!c)
- return err;
-
err = cache_alloc(sb, c, NULL);
if (err)
- goto err_stop;
+ goto err;
if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) {
err = run_cache_set(c);
if (err)
- goto err_stop;
+ goto err;
+ } else {
+ err = "error creating kobject";
+ if (bch_cache_set_online(c))
+ goto err;
}
- err = "error creating kobject";
- if (bch_cache_set_online(c))
- goto err_stop;
-out:
-
bch_info(c, "started");
return NULL;
-err_stop:
- bch_cache_set_stop(c);
+err:
+ if (allocated_cache_set)
+ bch_cache_set_stop(c);
return err;
}
@@ -2117,7 +2113,7 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path)
mutex_lock(&bch_register_lock);
- err = read_super(&sb, path);
+ err = read_super(&sb, c->opts, path);
if (err)
goto err_unlock;
@@ -2261,7 +2257,7 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
mutex_lock(&bch_register_lock);
for (i = 0; i < nr_devices; i++) {
- err = read_super(&sb[i], devices[i]);
+ err = read_super(&sb[i], opts, devices[i]);
if (err)
goto err_unlock;
@@ -2312,6 +2308,8 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices,
out:
kfree(sb);
module_put(THIS_MODULE);
+ if (err)
+ c = NULL;
return err;
err_unlock:
if (c)
@@ -2326,18 +2324,19 @@ err:
const char *bch_register_one(const char *path)
{
struct bcache_superblock sb;
+ struct cache_set_opts opts = cache_set_opts_empty();
const char *err;
mutex_lock(&bch_register_lock);
- err = read_super(&sb, path);
+ err = read_super(&sb, opts, path);
if (err)
goto err;
if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version)))
err = bch_backing_dev_register(&sb);
else
- err = register_cache(&sb, cache_set_opts_empty());
+ err = register_cache(&sb, opts);
free_super(&sb);
err:
diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c
index 40d006b..58a7125 100644
--- a/libbcache/sysfs.c
+++ b/libbcache/sysfs.c
@@ -24,31 +24,6 @@
#include <linux/blkdev.h>
#include <linux/sort.h>
-static const char * const cache_replacement_policies[] = {
- "lru",
- "fifo",
- "random",
- NULL
-};
-
-/* Default is -1; we skip past it for struct cached_dev's cache mode */
-static const char * const bch_cache_modes[] = {
- "default",
- "writethrough",
- "writeback",
- "writearound",
- "none",
- NULL
-};
-
-static const char * const bch_cache_state[] = {
- "active",
- "readonly",
- "failed",
- "spare",
- NULL
-};
-
write_attribute(attach);
write_attribute(detach);
write_attribute(unregister);
@@ -1237,7 +1212,7 @@ SHOW(bch_cache)
if (attr == &sysfs_cache_replacement_policy)
return bch_snprint_string_list(buf, PAGE_SIZE,
- cache_replacement_policies,
+ bch_cache_replacement_policies,
ca->mi.replacement);
sysfs_print(tier, ca->mi.tier);
@@ -1281,7 +1256,7 @@ STORE(__bch_cache)
}
if (attr == &sysfs_cache_replacement_policy) {
- ssize_t v = bch_read_string_list(buf, cache_replacement_policies);
+ ssize_t v = bch_read_string_list(buf, bch_cache_replacement_policies);
if (v < 0)
return v;
diff --git a/libbcache/tier.c b/libbcache/tier.c
index 2b568e1..39b04f7 100644
--- a/libbcache/tier.c
+++ b/libbcache/tier.c
@@ -224,6 +224,9 @@ int bch_tiering_read_start(struct cache_set *c)
{
struct task_struct *t;
+ if (c->opts.nochanges)
+ return 0;
+
t = kthread_create(bch_tiering_thread, c, "bch_tier_read");
if (IS_ERR(t))
return PTR_ERR(t);