diff options
Diffstat (limited to 'libbcache')
-rw-r--r-- | libbcache/alloc.c | 3 | ||||
-rw-r--r-- | libbcache/bcache.h | 5 | ||||
-rw-r--r-- | libbcache/bkey_methods.c | 10 | ||||
-rw-r--r-- | libbcache/bkey_methods.h | 2 | ||||
-rw-r--r-- | libbcache/btree_cache.c | 56 | ||||
-rw-r--r-- | libbcache/btree_cache.h | 10 | ||||
-rw-r--r-- | libbcache/btree_io.c | 7 | ||||
-rw-r--r-- | libbcache/btree_types.h | 3 | ||||
-rw-r--r-- | libbcache/btree_update.h | 7 | ||||
-rw-r--r-- | libbcache/debug.c | 60 | ||||
-rw-r--r-- | libbcache/dirent.c | 49 | ||||
-rw-r--r-- | libbcache/dirent.h | 12 | ||||
-rw-r--r-- | libbcache/error.h | 63 | ||||
-rw-r--r-- | libbcache/extents.c | 7 | ||||
-rw-r--r-- | libbcache/extents.h | 9 | ||||
-rw-r--r-- | libbcache/fs-gc.c | 710 | ||||
-rw-r--r-- | libbcache/fs-gc.h | 3 | ||||
-rw-r--r-- | libbcache/fs.c | 45 | ||||
-rw-r--r-- | libbcache/inode.c | 32 | ||||
-rw-r--r-- | libbcache/inode.h | 2 | ||||
-rw-r--r-- | libbcache/io.c | 5 | ||||
-rw-r--r-- | libbcache/journal.c | 37 | ||||
-rw-r--r-- | libbcache/movinggc.c | 16 | ||||
-rw-r--r-- | libbcache/opts.c | 46 | ||||
-rw-r--r-- | libbcache/opts.h | 21 | ||||
-rw-r--r-- | libbcache/str_hash.h | 8 | ||||
-rw-r--r-- | libbcache/super.c | 117 | ||||
-rw-r--r-- | libbcache/sysfs.c | 29 | ||||
-rw-r--r-- | libbcache/tier.c | 3 |
29 files changed, 959 insertions, 418 deletions
diff --git a/libbcache/alloc.c b/libbcache/alloc.c index cff750c..4fe08b5 100644 --- a/libbcache/alloc.c +++ b/libbcache/alloc.c @@ -254,6 +254,9 @@ static int bch_prio_write(struct cache *ca) bool need_new_journal_entry; int i, ret; + if (c->opts.nochanges) + return 0; + trace_bcache_prio_write_start(ca); atomic64_add(ca->mi.bucket_size * prio_buckets(ca), diff --git a/libbcache/bcache.h b/libbcache/bcache.h index 9a43a69..309d372 100644 --- a/libbcache/bcache.h +++ b/libbcache/bcache.h @@ -210,8 +210,9 @@ #define bch_meta_write_fault(name) \ dynamic_fault("bcache:meta:write:" name) -#define bch_fmt(_c, fmt) \ - "bcache (%s): " fmt "\n", ((_c)->name) +#ifndef bch_fmt +#define bch_fmt(_c, fmt) "bcache (%s): " fmt "\n", ((_c)->name) +#endif #define bch_info(c, fmt, ...) \ printk(KERN_INFO bch_fmt(c, fmt), ##__VA_ARGS__) diff --git a/libbcache/bkey_methods.c b/libbcache/bkey_methods.c index 3bcd0e0..90f7e5f 100644 --- a/libbcache/bkey_methods.c +++ b/libbcache/bkey_methods.c @@ -89,6 +89,16 @@ void bkey_debugcheck(struct cache_set *c, struct btree *b, struct bkey_s_c k) ops->key_debugcheck(c, b, k); } +void bch_val_to_text(struct cache_set *c, enum bkey_type type, + char *buf, size_t size, struct bkey_s_c k) +{ + const struct bkey_ops *ops = bch_bkey_ops[type]; + + if (k.k->type >= KEY_TYPE_GENERIC_NR && + ops->val_to_text) + ops->val_to_text(c, buf, size, k); +} + void bch_bkey_val_to_text(struct cache_set *c, enum bkey_type type, char *buf, size_t size, struct bkey_s_c k) { diff --git a/libbcache/bkey_methods.h b/libbcache/bkey_methods.h index 0e305eb..c1f0dc5 100644 --- a/libbcache/bkey_methods.h +++ b/libbcache/bkey_methods.h @@ -67,6 +67,8 @@ const char *btree_bkey_invalid(struct cache_set *, struct btree *, struct bkey_s_c); void bkey_debugcheck(struct cache_set *, struct btree *, struct bkey_s_c); +void bch_val_to_text(struct cache_set *, enum bkey_type, + char *, size_t, struct bkey_s_c); void bch_bkey_val_to_text(struct cache_set *, enum bkey_type, char *, size_t, struct bkey_s_c); diff --git a/libbcache/btree_cache.c b/libbcache/btree_cache.c index 0994190..ca6064a 100644 --- a/libbcache/btree_cache.c +++ b/libbcache/btree_cache.c @@ -149,7 +149,8 @@ static int mca_reap_notrace(struct cache_set *c, struct btree *b, bool flush) if (!six_trylock_write(&b->lock)) goto out_unlock_intent; - if (btree_node_write_error(b)) + if (btree_node_write_error(b) || + btree_node_noevict(b)) goto out_unlock; if (!list_empty(&b->write_blocked)) @@ -699,3 +700,56 @@ retry: return b; } + +int bch_print_btree_node(struct cache_set *c, struct btree *b, + char *buf, size_t len) +{ + const struct bkey_format *f = &b->format; + struct bset_stats stats; + char ptrs[100]; + + memset(&stats, 0, sizeof(stats)); + + bch_val_to_text(c, BKEY_TYPE_BTREE, ptrs, sizeof(ptrs), + bkey_i_to_s_c(&b->key)); + bch_btree_keys_stats(b, &stats); + + return scnprintf(buf, len, + "l %u %llu:%llu - %llu:%llu:\n" + " ptrs: %s\n" + " format: u64s %u fields %u %u %u %u %u\n" + " unpack fn len: %u\n" + " bytes used %zu/%zu (%zu%% full)\n" + " sib u64s: %u, %u (merge threshold %zu)\n" + " nr packed keys %u\n" + " nr unpacked keys %u\n" + " floats %zu\n" + " failed unpacked %zu\n" + " failed prev %zu\n" + " failed overflow %zu\n", + b->level, + b->data->min_key.inode, + b->data->min_key.offset, + b->data->max_key.inode, + b->data->max_key.offset, + ptrs, + f->key_u64s, + f->bits_per_field[0], + f->bits_per_field[1], + f->bits_per_field[2], + f->bits_per_field[3], + f->bits_per_field[4], + b->unpack_fn_len, + b->nr.live_u64s * sizeof(u64), + btree_bytes(c) - sizeof(struct btree_node), + b->nr.live_u64s * 100 / btree_max_u64s(c), + b->sib_u64s[0], + b->sib_u64s[1], + BTREE_FOREGROUND_MERGE_THRESHOLD(c), + b->nr.packed_keys, + b->nr.unpacked_keys, + stats.floats, + stats.failed_unpacked, + stats.failed_prev, + stats.failed_overflow); +} diff --git a/libbcache/btree_cache.h b/libbcache/btree_cache.h index e745abb..c26489d 100644 --- a/libbcache/btree_cache.h +++ b/libbcache/btree_cache.h @@ -56,6 +56,16 @@ static inline unsigned btree_blocks(struct cache_set *c) return c->sb.btree_node_size >> c->block_bits; } +#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4) + +#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3) +#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \ + (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \ + (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2)) + #define btree_node_root(_c, _b) ((_c)->btree_roots[(_b)->btree_id].b) +int bch_print_btree_node(struct cache_set *, struct btree *, + char *, size_t); + #endif /* _BCACHE_BTREE_CACHE_H */ diff --git a/libbcache/btree_io.c b/libbcache/btree_io.c index ff976b5..4c295af 100644 --- a/libbcache/btree_io.c +++ b/libbcache/btree_io.c @@ -200,7 +200,7 @@ static unsigned sort_extent_whiteouts(struct bkey_packed *dst, const struct bkey_format *f = &iter->b->format; struct bkey_packed *in, *out = dst; struct bkey_i l, r; - bool prev = false, l_packed; + bool prev = false, l_packed = false; u64 max_packed_size = bkey_field_max(f, BKEY_FIELD_SIZE); u64 max_packed_offset = bkey_field_max(f, BKEY_FIELD_OFFSET); u64 new_size; @@ -1443,8 +1443,9 @@ void __bch_btree_node_write(struct cache_set *c, struct btree *b, * Make sure to update b->written so bch_btree_init_next() doesn't * break: */ - if (bch_journal_error(&c->journal)) { - set_btree_node_write_error(b); + if (bch_journal_error(&c->journal) || + c->opts.nochanges) { + set_btree_node_noevict(b); b->written += sectors_to_write; btree_bounce_free(c, order, used_mempool, data); diff --git a/libbcache/btree_types.h b/libbcache/btree_types.h index 3632a04..176d42a 100644 --- a/libbcache/btree_types.h +++ b/libbcache/btree_types.h @@ -2,6 +2,7 @@ #define _BCACHE_BTREE_TYPES_H #include <linux/bcache.h> +#include <linux/kernel.h> #include <linux/list.h> #include <linux/rhashtable.h> #include <linux/semaphore.h> @@ -138,6 +139,7 @@ enum btree_flags { BTREE_NODE_read_error, BTREE_NODE_write_error, BTREE_NODE_dirty, + BTREE_NODE_noevict, BTREE_NODE_write_idx, BTREE_NODE_accessed, BTREE_NODE_write_in_flight, @@ -147,6 +149,7 @@ enum btree_flags { BTREE_FLAG(read_error); BTREE_FLAG(write_error); BTREE_FLAG(dirty); +BTREE_FLAG(noevict); BTREE_FLAG(write_idx); BTREE_FLAG(accessed); BTREE_FLAG(write_in_flight); diff --git a/libbcache/btree_update.h b/libbcache/btree_update.h index 0154441..5fc1b1a 100644 --- a/libbcache/btree_update.h +++ b/libbcache/btree_update.h @@ -11,13 +11,6 @@ struct bkey_format_state; struct bkey_format; struct btree; -#define BTREE_SPLIT_THRESHOLD(c) (btree_blocks(c) * 3 / 4) - -#define BTREE_FOREGROUND_MERGE_THRESHOLD(c) (btree_max_u64s(c) * 1 / 3) -#define BTREE_FOREGROUND_MERGE_HYSTERESIS(c) \ - (BTREE_FOREGROUND_MERGE_THRESHOLD(c) + \ - (BTREE_FOREGROUND_MERGE_THRESHOLD(c) << 2)) - static inline void btree_node_reset_sib_u64s(struct btree *b) { b->sib_u64s[0] = b->nr.live_u64s; diff --git a/libbcache/debug.c b/libbcache/debug.c index 1be2e60..39f5550 100644 --- a/libbcache/debug.c +++ b/libbcache/debug.c @@ -46,6 +46,9 @@ void __bch_btree_verify(struct cache_set *c, struct btree *b) struct bio *bio; struct closure cl; + if (c->opts.nochanges) + return; + closure_init_stack(&cl); btree_node_io_lock(b); @@ -296,55 +299,6 @@ static const struct file_operations btree_debug_ops = { .read = bch_read_btree, }; -static int print_btree_node(struct dump_iter *i, struct btree *b) -{ - const struct bkey_format *f = &b->format; - struct bset_stats stats; - - memset(&stats, 0, sizeof(stats)); - - bch_btree_keys_stats(b, &stats); - - i->bytes = scnprintf(i->buf, sizeof(i->buf), - "l %u %llu:%llu - %llu:%llu:\n" - " format: u64s %u fields %u %u %u %u %u\n" - " unpack fn len: %u\n" - " bytes used %zu/%zu (%zu%% full)\n" - " sib u64s: %u, %u (merge threshold %zu)\n" - " nr packed keys %u\n" - " nr unpacked keys %u\n" - " floats %zu\n" - " failed unpacked %zu\n" - " failed prev %zu\n" - " failed overflow %zu\n", - b->level, - b->data->min_key.inode, - b->data->min_key.offset, - b->data->max_key.inode, - b->data->max_key.offset, - f->key_u64s, - f->bits_per_field[0], - f->bits_per_field[1], - f->bits_per_field[2], - f->bits_per_field[3], - f->bits_per_field[4], - b->unpack_fn_len, - b->nr.live_u64s * sizeof(u64), - btree_bytes(i->c) - sizeof(struct btree_node), - b->nr.live_u64s * 100 / btree_max_u64s(i->c), - b->sib_u64s[0], - b->sib_u64s[1], - BTREE_FOREGROUND_MERGE_THRESHOLD(i->c), - b->nr.packed_keys, - b->nr.unpacked_keys, - stats.floats, - stats.failed_unpacked, - stats.failed_prev, - stats.failed_overflow); - - return flush_buf(i); -} - static ssize_t bch_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -365,7 +319,9 @@ static ssize_t bch_read_btree_formats(struct file *file, char __user *buf, return i->ret; for_each_btree_node(&iter, i->c, i->id, i->from, 0, b) { - err = print_btree_node(i, b); + i->bytes = bch_print_btree_node(i->c, b, i->buf, + sizeof(i->buf)); + err = flush_buf(i); if (err) break; @@ -421,7 +377,9 @@ static ssize_t bch_read_bfloat_failed(struct file *file, char __user *buf, struct bkey_packed *_k = bch_btree_node_iter_peek(node_iter, b); if (iter.nodes[0] != prev_node) { - err = print_btree_node(i, iter.nodes[0]); + i->bytes = bch_print_btree_node(i->c, b, i->buf, + sizeof(i->buf)); + err = flush_buf(i); if (err) break; } diff --git a/libbcache/dirent.c b/libbcache/dirent.c index 920ad2f..d97c3b2 100644 --- a/libbcache/dirent.c +++ b/libbcache/dirent.c @@ -10,7 +10,7 @@ #include <linux/dcache.h> -static unsigned dirent_name_bytes(struct bkey_s_c_dirent d) +unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent d) { unsigned len = bkey_val_bytes(d.k) - sizeof(struct bch_dirent); @@ -61,7 +61,7 @@ static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key) static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); - struct qstr name = QSTR_INIT(d.v->d_name, dirent_name_bytes(d)); + struct qstr name = QSTR_INIT(d.v->d_name, bch_dirent_name_bytes(d)); return bch_dirent_hash(info, &name); } @@ -69,7 +69,7 @@ static u64 dirent_hash_bkey(const struct bch_hash_info *info, struct bkey_s_c k) static bool dirent_cmp_key(struct bkey_s_c _l, const void *_r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); - int len = dirent_name_bytes(l); + int len = bch_dirent_name_bytes(l); const struct qstr *r = _r; return len - r->len ?: memcmp(l.v->d_name, r->name, len); @@ -79,8 +79,8 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) { struct bkey_s_c_dirent l = bkey_s_c_to_dirent(_l); struct bkey_s_c_dirent r = bkey_s_c_to_dirent(_r); - int l_len = dirent_name_bytes(l); - int r_len = dirent_name_bytes(r); + int l_len = bch_dirent_name_bytes(l); + int r_len = bch_dirent_name_bytes(r); return l_len - r_len ?: memcmp(l.v->d_name, r.v->d_name, l_len); } @@ -125,7 +125,7 @@ static void bch_dirent_to_text(struct cache_set *c, char *buf, if (size) { unsigned n = min_t(unsigned, size, - dirent_name_bytes(d)); + bch_dirent_name_bytes(d)); memcpy(buf, d.v->d_name, n); buf[size - 1] = '\0'; buf += n; @@ -167,15 +167,16 @@ static struct bkey_i_dirent *dirent_create_key(u8 type, bkey_val_bytes(&dirent->k) - (sizeof(struct bch_dirent) + name->len)); - EBUG_ON(dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); + EBUG_ON(bch_dirent_name_bytes(dirent_i_to_s_c(dirent)) != name->len); return dirent; } -int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type, - const struct qstr *name, u64 dst_inum) +int bch_dirent_create(struct cache_set *c, u64 dir_inum, + const struct bch_hash_info *hash_info, + u8 type, const struct qstr *name, u64 dst_inum, + u64 *journal_seq, int flags) { - struct bch_inode_info *ei = to_bch_ei(dir); struct bkey_i_dirent *dirent; int ret; @@ -183,9 +184,8 @@ int bch_dirent_create(struct cache_set *c, struct inode *dir, u8 type, if (!dirent) return -ENOMEM; - ret = bch_hash_set(dirent_hash_desc, &ei->str_hash, c, - ei->vfs_inode.i_ino, &ei->journal_seq, - &dirent->k_i, BCH_HASH_SET_MUST_CREATE); + ret = bch_hash_set(dirent_hash_desc, hash_info, c, dir_inum, + journal_seq, &dirent->k_i, flags); kfree(dirent); return ret; @@ -346,26 +346,25 @@ err: return ret; } -int bch_dirent_delete(struct cache_set *c, struct inode *dir, - const struct qstr *name) +int bch_dirent_delete(struct cache_set *c, u64 dir_inum, + const struct bch_hash_info *hash_info, + const struct qstr *name, + u64 *journal_seq) { - struct bch_inode_info *ei = to_bch_ei(dir); - - return bch_hash_delete(dirent_hash_desc, &ei->str_hash, - c, ei->vfs_inode.i_ino, - &ei->journal_seq, name); + return bch_hash_delete(dirent_hash_desc, hash_info, + c, dir_inum, journal_seq, name); } -u64 bch_dirent_lookup(struct cache_set *c, struct inode *dir, +u64 bch_dirent_lookup(struct cache_set *c, u64 dir_inum, + const struct bch_hash_info *hash_info, const struct qstr *name) { - struct bch_inode_info *ei = to_bch_ei(dir); struct btree_iter iter; struct bkey_s_c k; u64 inum; - k = bch_hash_lookup(dirent_hash_desc, &ei->str_hash, c, - ei->vfs_inode.i_ino, &iter, name); + k = bch_hash_lookup(dirent_hash_desc, hash_info, c, + dir_inum, &iter, name); if (IS_ERR(k.k)) { bch_btree_iter_unlock(&iter); return 0; @@ -428,7 +427,7 @@ int bch_readdir(struct cache_set *c, struct file *file, if (k.k->p.inode > inode->i_ino) break; - len = dirent_name_bytes(dirent); + len = bch_dirent_name_bytes(dirent); pr_debug("emitting %s", dirent.v->d_name); diff --git a/libbcache/dirent.h b/libbcache/dirent.h index e18089b..cc67d55 100644 --- a/libbcache/dirent.h +++ b/libbcache/dirent.h @@ -7,10 +7,13 @@ struct qstr; struct file; struct dir_context; struct cache_set; +struct bch_hash_info; -int bch_dirent_create(struct cache_set *c, struct inode *, u8, - const struct qstr *, u64); -int bch_dirent_delete(struct cache_set *c, struct inode *, const struct qstr *); +unsigned bch_dirent_name_bytes(struct bkey_s_c_dirent); +int bch_dirent_create(struct cache_set *c, u64, const struct bch_hash_info *, + u8, const struct qstr *, u64, u64 *, int); +int bch_dirent_delete(struct cache_set *, u64, const struct bch_hash_info *, + const struct qstr *, u64 *); enum bch_rename_mode { BCH_RENAME, @@ -23,8 +26,9 @@ int bch_dirent_rename(struct cache_set *, struct inode *, const struct qstr *, u64 *, enum bch_rename_mode); -u64 bch_dirent_lookup(struct cache_set *c, struct inode *, +u64 bch_dirent_lookup(struct cache_set *, u64, const struct bch_hash_info *, const struct qstr *); + int bch_empty_dir(struct cache_set *, u64); int bch_readdir(struct cache_set *, struct file *, struct dir_context *); diff --git a/libbcache/error.h b/libbcache/error.h index 9eb9335..33a28c4 100644 --- a/libbcache/error.h +++ b/libbcache/error.h @@ -101,38 +101,51 @@ enum { BCH_FSCK_UNKNOWN_VERSION = 4, }; -#define unfixable_fsck_err(c, msg, ...) \ -do { \ - bch_err(c, msg " (repair unimplemented)", ##__VA_ARGS__); \ - ret = BCH_FSCK_REPAIR_UNIMPLEMENTED; \ - goto fsck_err; \ -} while (0) +/* These macros return true if error should be fixed: */ -#define unfixable_fsck_err_on(cond, c, ...) \ -do { \ - if (cond) \ - unfixable_fsck_err(c, __VA_ARGS__); \ -} while (0) +/* XXX: mark in superblock that filesystem contains errors, if we ignore: */ -#define fsck_err(c, msg, ...) \ -do { \ - if (!(c)->opts.fix_errors) { \ - bch_err(c, msg, ##__VA_ARGS__); \ +#ifndef __fsck_err +#define __fsck_err(c, _can_fix, _can_ignore, _nofix_msg, msg, ...) \ +({ \ + bool _fix = false; \ + \ + if (_can_fix && (c)->opts.fix_errors) { \ + bch_err(c, msg ", fixing", ##__VA_ARGS__); \ + set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ + _fix = true; \ + } else if (_can_ignore && \ + (c)->opts.errors == BCH_ON_ERROR_CONTINUE) { \ + bch_err(c, msg " (ignoring)", ##__VA_ARGS__); \ + } else { \ + bch_err(c, msg " ("_nofix_msg")", ##__VA_ARGS__); \ ret = BCH_FSCK_ERRORS_NOT_FIXED; \ goto fsck_err; \ } \ - set_bit(CACHE_SET_FSCK_FIXED_ERRORS, &(c)->flags); \ - bch_err(c, msg ", fixing", ##__VA_ARGS__); \ -} while (0) - -#define fsck_err_on(cond, c, ...) \ -({ \ - bool _ret = (cond); \ \ - if (_ret) \ - fsck_err(c, __VA_ARGS__); \ - _ret; \ + BUG_ON(!_fix && !_can_ignore); \ + _fix; \ }) +#endif + +#define __fsck_err_on(cond, c, _can_fix, _can_ignore, _nofix_msg, ...) \ + ((cond) ? __fsck_err(c, _can_fix, _can_ignore, \ + _nofix_msg, ##__VA_ARGS__) : false) + +#define unfixable_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, false, true, "repair unimplemented", ##__VA_ARGS__) + +#define need_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, false, true, "run fsck to correct", ##__VA_ARGS__) + +#define mustfix_fsck_err(c, ...) \ + __fsck_err(c, true, false, "not fixing", ##__VA_ARGS__) + +#define mustfix_fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, true, false, "not fixing", ##__VA_ARGS__) + +#define fsck_err_on(cond, c, ...) \ + __fsck_err_on(cond, c, true, true, "not fixing", ##__VA_ARGS__) /* * Fatal errors: these don't indicate a bug, but we can't continue running in RW diff --git a/libbcache/extents.c b/libbcache/extents.c index 45fa220..c026d59 100644 --- a/libbcache/extents.c +++ b/libbcache/extents.c @@ -108,15 +108,16 @@ struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *dst, /* Common among btree and extent ptrs */ -bool bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev) +const struct bch_extent_ptr * +bch_extent_has_device(struct bkey_s_c_extent e, unsigned dev) { const struct bch_extent_ptr *ptr; extent_for_each_ptr(e, ptr) if (ptr->dev == dev) - return true; + return ptr; - return false; + return NULL; } unsigned bch_extent_nr_ptrs_from(struct bkey_s_c_extent e, diff --git a/libbcache/extents.h b/libbcache/extents.h index 2dc6446..e1cb47a 100644 --- a/libbcache/extents.h +++ b/libbcache/extents.h @@ -1,15 +1,15 @@ #ifndef _BCACHE_EXTENTS_H #define _BCACHE_EXTENTS_H +#include "bcache.h" #include "bkey.h" #include <linux/bcache.h> -struct bch_replace_info; -union bch_extent_crc; -struct btree_iter; +struct btree_node_iter; struct btree_insert; struct btree_insert_entry; +struct extent_insert_hook; struct btree_nr_keys bch_key_sort_fix_overlapping(struct bset *, struct btree *, @@ -485,7 +485,8 @@ static inline void bch_extent_drop_ptr(struct bkey_s_extent e, bch_extent_drop_redundant_crcs(e); } -bool bch_extent_has_device(struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch_extent_has_device(struct bkey_s_c_extent, unsigned); bool bch_cut_front(struct bpos, struct bkey_i *); bool bch_cut_back(struct bpos, struct bkey *); diff --git a/libbcache/fs-gc.c b/libbcache/fs-gc.c index bd2a867..1dec230 100644 --- a/libbcache/fs-gc.c +++ b/libbcache/fs-gc.c @@ -11,6 +11,529 @@ #include <linux/generic-radix-tree.h> +#define QSTR(n) { { { .len = strlen(n) } }, .name = n } + +static int remove_dirent(struct cache_set *c, struct btree_iter *iter, + struct bkey_s_c_dirent dirent) +{ + struct qstr name; + struct bkey_i_inode dir_inode; + struct bch_hash_info dir_hash_info; + u64 dir_inum = dirent.k->p.inode; + int ret; + char *buf; + + name.len = bch_dirent_name_bytes(dirent); + buf = kmalloc(name.len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, dirent.v->d_name, name.len); + buf[name.len] = '\0'; + name.name = buf; + + /* Unlock iter so we don't deadlock, after copying name: */ + bch_btree_iter_unlock(iter); + + ret = bch_inode_find_by_inum(c, dir_inum, &dir_inode); + if (ret) + goto err; + + dir_hash_info = bch_hash_info_init(&dir_inode.v); + + ret = bch_dirent_delete(c, dir_inum, &dir_hash_info, &name, NULL); +err: + kfree(buf); + return ret; +} + +static int reattach_inode(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + u64 inum) +{ + struct bch_hash_info lostfound_hash_info = + bch_hash_info_init(&lostfound_inode->v); + char name_buf[20]; + struct qstr name; + int ret; + + snprintf(name_buf, sizeof(name_buf), "%llu", inum); + name = (struct qstr) QSTR(name_buf); + + le32_add_cpu(&lostfound_inode->v.i_nlink, 1); + + ret = bch_btree_insert(c, BTREE_ID_INODES, &lostfound_inode->k_i, + NULL, NULL, NULL, 0); + if (ret) + return ret; + + return bch_dirent_create(c, lostfound_inode->k.p.inode, + &lostfound_hash_info, + DT_DIR, &name, inum, NULL, 0); +} + +struct inode_walker { + bool first_this_inode; + bool have_inode; + u16 i_mode; + u64 i_size; + u64 cur_inum; + struct bkey_i_inode inode; +}; + +static struct inode_walker inode_walker_init(void) +{ + return (struct inode_walker) { + .cur_inum = -1, + .have_inode = false, + }; +} + +static int walk_inode(struct cache_set *c, struct inode_walker *w, u64 inum) +{ + w->first_this_inode = inum != w->cur_inum; + w->cur_inum = inum; + + if (w->first_this_inode) { + int ret = bch_inode_find_by_inum(c, inum, &w->inode); + + if (ret && ret != -ENOENT) + return ret; + + w->have_inode = !ret; + + if (w->have_inode) { + w->i_mode = le16_to_cpu(w->inode.v.i_mode); + w->i_size = le64_to_cpu(w->inode.v.i_size); + } + } + + return 0; +} + +/* + * Walk extents: verify that extents have a corresponding S_ISREG inode, and + * that i_size an i_sectors are consistent + */ +noinline_for_stack +static int check_extents(struct cache_set *c) +{ + struct inode_walker w = inode_walker_init(); + struct btree_iter iter; + struct bkey_s_c k; + u64 i_sectors; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, + POS(BCACHE_ROOT_INO, 0), k) { + if (k.k->type == KEY_TYPE_DISCARD) + continue; + + ret = walk_inode(c, &w, k.k->p.inode); + if (ret) + break; + + unfixable_fsck_err_on(!w.have_inode, c, + "extent type %u for missing inode %llu", + k.k->type, k.k->p.inode); + + unfixable_fsck_err_on(w.first_this_inode && w.have_inode && + le64_to_cpu(w.inode.v.i_sectors) != + (i_sectors = bch_count_inode_sectors(c, w.cur_inum)), + c, "i_sectors wrong: got %llu, should be %llu", + le64_to_cpu(w.inode.v.i_sectors), i_sectors); + + unfixable_fsck_err_on(w.have_inode && + !S_ISREG(w.i_mode) && !S_ISLNK(w.i_mode), c, + "extent type %u for non regular file, inode %llu mode %o", + k.k->type, k.k->p.inode, w.i_mode); + + unfixable_fsck_err_on(k.k->type != BCH_RESERVATION && + k.k->p.offset > round_up(w.i_size, PAGE_SIZE) >> 9, c, + "extent type %u offset %llu past end of inode %llu, i_size %llu", + k.k->type, k.k->p.offset, k.k->p.inode, w.i_size); + } +fsck_err: + return bch_btree_iter_unlock(&iter) ?: ret; +} + +/* + * Walk dirents: verify that they all have a corresponding S_ISDIR inode, + * validate d_type + */ +noinline_for_stack +static int check_dirents(struct cache_set *c) +{ + struct inode_walker w = inode_walker_init(); + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + POS(BCACHE_ROOT_INO, 0), k) { + struct bkey_s_c_dirent d; + struct bkey_i_inode target; + bool have_target; + u64 d_inum; + + ret = walk_inode(c, &w, k.k->p.inode); + if (ret) + break; + + unfixable_fsck_err_on(!w.have_inode, c, + "dirent in nonexisting directory %llu", + k.k->p.inode); + + unfixable_fsck_err_on(!S_ISDIR(w.i_mode), c, + "dirent in non directory inode %llu, type %u", + k.k->p.inode, mode_to_type(w.i_mode)); + + if (k.k->type != BCH_DIRENT) + continue; + + d = bkey_s_c_to_dirent(k); + d_inum = le64_to_cpu(d.v->d_inum); + + if (fsck_err_on(d_inum == d.k->p.inode, c, + "dirent points to own directory")) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } + + ret = bch_inode_find_by_inum(c, d_inum, &target); + if (ret && ret != -ENOENT) + break; + + have_target = !ret; + ret = 0; + + if (fsck_err_on(!have_target, c, + "dirent points to missing inode %llu, type %u filename %s", + d_inum, d.v->d_type, d.v->d_name)) { + ret = remove_dirent(c, &iter, d); + if (ret) + goto err; + continue; + } + + if (fsck_err_on(have_target && + d.v->d_type != + mode_to_type(le16_to_cpu(target.v.i_mode)), c, + "incorrect d_type: got %u should be %u, filename %s", + d.v->d_type, + mode_to_type(le16_to_cpu(target.v.i_mode)), + d.v->d_name)) { + struct bkey_i_dirent *n; + + n = kmalloc(bkey_bytes(d.k), GFP_KERNEL); + if (!n) { + ret = -ENOMEM; + goto err; + } + + bkey_reassemble(&n->k_i, d.s_c); + n->v.d_type = mode_to_type(le16_to_cpu(target.v.i_mode)); + + ret = bch_btree_insert_at(c, NULL, NULL, NULL, + BTREE_INSERT_NOFAIL, + BTREE_INSERT_ENTRY(&iter, &n->k_i)); + kfree(n); + if (ret) + goto err; + + } + } +err: +fsck_err: + return bch_btree_iter_unlock(&iter) ?: ret; +} + +/* + * Walk xattrs: verify that they all have a corresponding inode + */ +noinline_for_stack +static int check_xattrs(struct cache_set *c) +{ + struct inode_walker w = inode_walker_init(); + struct btree_iter iter; + struct bkey_s_c k; + int ret = 0; + + for_each_btree_key(&iter, c, BTREE_ID_XATTRS, + POS(BCACHE_ROOT_INO, 0), k) { + ret = walk_inode(c, &w, k.k->p.inode); + if (ret) + break; + + unfixable_fsck_err_on(!w.have_inode, c, + "xattr for missing inode %llu", + k.k->p.inode); + } +fsck_err: + return bch_btree_iter_unlock(&iter) ?: ret; +} + +/* Get root directory, create if it doesn't exist: */ +static int check_root(struct cache_set *c, struct bkey_i_inode *root_inode) +{ + int ret; + + ret = bch_inode_find_by_inum(c, BCACHE_ROOT_INO, root_inode); + if (ret && ret != -ENOENT) + return ret; + + if (fsck_err_on(ret, c, "root directory missing")) + goto create_root; + + if (fsck_err_on(!S_ISDIR(le16_to_cpu(root_inode->v.i_mode)), c, + "root inode not a directory")) + goto create_root; + + return 0; +fsck_err: + return ret; +create_root: + bch_inode_init(c, root_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); + root_inode->k.p.inode = BCACHE_ROOT_INO; + + return bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i, + NULL, NULL, NULL, 0); +} + +/* Get lost+found, create if it doesn't exist: */ +static int check_lostfound(struct cache_set *c, + struct bkey_i_inode *root_inode, + struct bkey_i_inode *lostfound_inode) +{ + struct qstr lostfound = QSTR("lost+found"); + struct bch_hash_info root_hash_info = bch_hash_info_init(&root_inode->v); + u64 inum; + int ret; + + inum = bch_dirent_lookup(c, BCACHE_ROOT_INO, &root_hash_info, + &lostfound); + if (!inum) { + bch_notice(c, "creating lost+found"); + goto create_lostfound; + } + + ret = bch_inode_find_by_inum(c, inum, lostfound_inode); + if (ret && ret != -ENOENT) + return ret; + + if (fsck_err_on(ret, c, "lost+found missing")) + goto create_lostfound; + + if (fsck_err_on(!S_ISDIR(le16_to_cpu(lostfound_inode->v.i_mode)), c, + "lost+found inode not a directory")) + goto create_lostfound; + + return 0; +fsck_err: + return ret; +create_lostfound: + le32_add_cpu(&root_inode->v.i_nlink, 1); + + ret = bch_btree_insert(c, BTREE_ID_INODES, &root_inode->k_i, + NULL, NULL, NULL, 0); + if (ret) + return ret; + + bch_inode_init(c, lostfound_inode, 0, 0, S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); + + ret = bch_inode_create(c, &lostfound_inode->k_i, BLOCKDEV_INODE_MAX, 0, + &c->unused_inode_hint); + if (ret) + return ret; + + ret = bch_dirent_create(c, BCACHE_ROOT_INO, &root_hash_info, DT_DIR, + &lostfound, lostfound_inode->k.p.inode, NULL, 0); + if (ret) + return ret; + + return 0; +} + +struct inode_bitmap { + unsigned long *bits; + size_t size; +}; + +static inline bool inode_bitmap_test(struct inode_bitmap *b, size_t nr) +{ + return nr < b->size ? test_bit(nr, b->bits) : false; +} + +static inline int inode_bitmap_set(struct inode_bitmap *b, size_t nr) +{ + if (nr >= b->size) { + size_t new_size = max(max(PAGE_SIZE * 8, + b->size * 2), + nr + 1); + void *n; + + new_size = roundup_pow_of_two(new_size); + n = krealloc(b->bits, new_size / 8, GFP_KERNEL|__GFP_ZERO); + if (!n) + return -ENOMEM; + + b->bits = n; + b->size = new_size; + } + + __set_bit(nr, b->bits); + return 0; +} + +struct pathbuf { + size_t nr; + size_t size; + + struct pathbuf_entry { + u64 inum; + u64 offset; + } *entries; +}; + +static int path_down(struct pathbuf *p, u64 inum) +{ + if (p->nr == p->size) { + size_t new_size = max(256UL, p->size * 2); + void *n = krealloc(p->entries, + new_size * sizeof(p->entries[0]), + GFP_KERNEL); + if (!n) + return -ENOMEM; + + p->entries = n; + p->size = new_size; + }; + + p->entries[p->nr++] = (struct pathbuf_entry) { + .inum = inum, + .offset = 0, + }; + return 0; +} + +noinline_for_stack +static int check_directory_structure(struct cache_set *c, + struct bkey_i_inode *lostfound_inode) +{ + struct inode_bitmap dirs_done = { NULL, 0 }; + struct pathbuf path = { 0, 0, NULL }; + struct pathbuf_entry *e; + struct btree_iter iter; + struct bkey_s_c k; + struct bkey_s_c_dirent dirent; + bool had_unreachable; + u64 d_inum; + int ret = 0; + + /* DFS: */ +restart_dfs: + ret = inode_bitmap_set(&dirs_done, BCACHE_ROOT_INO); + if (ret) + goto err; + + ret = path_down(&path, BCACHE_ROOT_INO); + if (ret) + return ret; + + while (path.nr) { +next: + e = &path.entries[path.nr - 1]; + + if (e->offset == U64_MAX) + goto up; + + for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, + POS(e->inum, e->offset + 1), k) { + if (k.k->p.inode != e->inum) + break; + + e->offset = k.k->p.offset; + + if (k.k->type != BCH_DIRENT) + continue; + + dirent = bkey_s_c_to_dirent(k); + + if (dirent.v->d_type != DT_DIR) + continue; + + d_inum = le64_to_cpu(dirent.v->d_inum); + + if (fsck_err_on(inode_bitmap_test(&dirs_done, d_inum), c, + "directory with multiple hardlinks")) { + ret = remove_dirent(c, &iter, dirent); + if (ret) + goto err; + continue; + } + + ret = inode_bitmap_set(&dirs_done, d_inum); + if (ret) + goto err; + + ret = path_down(&path, d_inum); + if (ret) + goto err; + + bch_btree_iter_unlock(&iter); + goto next; + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + goto err; +up: + path.nr--; + } + + had_unreachable = false; + + for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, k) { + if (k.k->type != BCH_INODE_FS || + !S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->i_mode))) + continue; + + if (fsck_err_on(!inode_bitmap_test(&dirs_done, k.k->p.inode), c, + "unreachable directory found (inum %llu)", + k.k->p.inode)) { + bch_btree_iter_unlock(&iter); + + ret = reattach_inode(c, lostfound_inode, k.k->p.inode); + if (ret) + goto err; + + had_unreachable = true; + } + } + ret = bch_btree_iter_unlock(&iter); + if (ret) + goto err; + + if (had_unreachable) { + bch_info(c, "reattached unreachable directories, restarting pass to check for loops"); + kfree(dirs_done.bits); + kfree(path.entries); + memset(&dirs_done, 0, sizeof(dirs_done)); + memset(&path, 0, sizeof(path)); + goto restart_dfs; + } + +out: + kfree(dirs_done.bits); + kfree(path.entries); + return ret; +err: +fsck_err: + ret = bch_btree_iter_unlock(&iter) ?: ret; + goto out; +} + struct nlink { u32 count; u32 dir_count; @@ -40,11 +563,6 @@ static void inc_link(struct cache_set *c, struct nlinks *links, link->count++; } -/* - * XXX: should do a DFS (via filesystem heirarchy), and make sure all dirents - * are reachable - */ - noinline_for_stack static int bch_gc_walk_dirents(struct cache_set *c, struct nlinks *links, u64 range_start, u64 *range_end) @@ -99,7 +617,9 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum) return bch_btree_iter_unlock(&iter) ?: sectors; } -static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, +static int bch_gc_do_inode(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + struct btree_iter *iter, struct bkey_s_c_inode inode, struct nlink link) { u16 i_mode = le16_to_cpu(inode.v->i_mode); @@ -115,14 +635,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, inode.k->p.inode, i_nlink, link.count, mode_to_type(i_mode)); + /* These should have been caught/fixed by earlier passes: */ if (S_ISDIR(i_mode)) { - unfixable_fsck_err_on(link.count > 1, c, + need_fsck_err_on(link.count > 1, c, "directory %llu with multiple hardlinks: %u", inode.k->p.inode, link.count); real_i_nlink = link.count * 2 + link.dir_count; } else { - unfixable_fsck_err_on(link.dir_count, c, + need_fsck_err_on(link.dir_count, c, "found dirents for non directory %llu", inode.k->p.inode); @@ -135,11 +656,16 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter, "but found orphaned inode %llu", inode.k->p.inode); - unfixable_fsck_err_on(S_ISDIR(i_mode) && - bch_empty_dir(c, inode.k->p.inode), c, - "non empty directory with link count 0, " - "inode nlink %u, dir links found %u", - i_nlink, link.dir_count); + if (fsck_err_on(S_ISDIR(i_mode) && + bch_empty_dir(c, inode.k->p.inode), c, + "non empty directory with link count 0, " + "inode nlink %u, dir links found %u", + i_nlink, link.dir_count)) { + ret = reattach_inode(c, lostfound_inode, + inode.k->p.inode); + if (ret) + return ret; + } bch_verbose(c, "deleting inode %llu", inode.k->p.inode); @@ -235,7 +761,9 @@ fsck_err: } noinline_for_stack -static int bch_gc_walk_inodes(struct cache_set *c, struct nlinks *links, +static int bch_gc_walk_inodes(struct cache_set *c, + struct bkey_i_inode *lostfound_inode, + struct nlinks *links, u64 range_start, u64 range_end) { struct btree_iter iter; @@ -257,7 +785,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); nlinks_pos = range_start + nlinks_iter.pos; if (iter.pos.inode > nlinks_pos) { - unfixable_fsck_err_on(link && link->count, c, + /* Should have been caught by dirents pass: */ + need_fsck_err_on(link && link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); genradix_iter_advance(&nlinks_iter, links); @@ -274,9 +803,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); */ bch_btree_iter_unlock(&iter); - ret = bch_gc_do_inode(c, &iter, - bkey_s_c_to_inode(k), - *link); + ret = bch_gc_do_inode(c, lostfound_inode, &iter, + bkey_s_c_to_inode(k), *link); if (ret == -EINTR) continue; if (ret) @@ -285,7 +813,8 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (link->count) atomic_long_inc(&c->nr_inodes); } else { - unfixable_fsck_err_on(link->count, c, + /* Should have been caught by dirents pass: */ + need_fsck_err_on(link->count, c, "missing inode %llu (nlink %u)", nlinks_pos, link->count); } @@ -304,7 +833,9 @@ fsck_err: return ret ?: ret2; } -int bch_gc_inode_nlinks(struct cache_set *c) +noinline_for_stack +static int check_inode_nlinks(struct cache_set *c, + struct bkey_i_inode *lostfound_inode) { struct nlinks links; u64 this_iter_range_start, next_iter_range_start = 0; @@ -322,7 +853,7 @@ int bch_gc_inode_nlinks(struct cache_set *c) if (ret) break; - ret = bch_gc_walk_inodes(c, &links, + ret = bch_gc_walk_inodes(c, lostfound_inode, &links, this_iter_range_start, next_iter_range_start); if (ret) @@ -336,140 +867,45 @@ int bch_gc_inode_nlinks(struct cache_set *c) return ret; } -static void next_inode(struct cache_set *c, u64 inum, u64 *cur_inum, - struct bkey_i_inode *inode, - bool *first_this_inode, bool *have_inode, - u64 *i_size, u16 *i_mode) -{ - *first_this_inode = inum != *cur_inum; - *cur_inum = inum; - - if (*first_this_inode) { - *have_inode = !bch_inode_find_by_inum(c, inum, inode); - - if (*have_inode) { - *i_mode = le16_to_cpu(inode->v.i_mode); - *i_size = le64_to_cpu(inode->v.i_size); - } - } -} - /* * Checks for inconsistencies that shouldn't happen, unless we have a bug. * Doesn't fix them yet, mainly because they haven't yet been observed: */ -int bch_fsck(struct cache_set *c) +int bch_fsck(struct cache_set *c, bool full_fsck) { - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_inode inode; - bool first_this_inode, have_inode; - u64 cur_inum, i_sectors; - u64 i_size = 0; - u16 i_mode = 0; - int ret = 0; - - cur_inum = -1; - have_inode = false; - for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, - POS(BCACHE_ROOT_INO, 0), k) { - if (k.k->type == KEY_TYPE_DISCARD) - continue; - - next_inode(c, k.k->p.inode, &cur_inum, &inode, - &first_this_inode, &have_inode, - &i_size, &i_mode); - - unfixable_fsck_err_on(!have_inode, c, - "extent type %u for missing inode %llu", - k.k->type, k.k->p.inode); - - unfixable_fsck_err_on(first_this_inode && have_inode && - le64_to_cpu(inode.v.i_sectors) != - (i_sectors = bch_count_inode_sectors(c, cur_inum)), - c, "i_sectors wrong: got %llu, should be %llu", - le64_to_cpu(inode.v.i_sectors), i_sectors); - - unfixable_fsck_err_on(have_inode && - !S_ISREG(i_mode) && !S_ISLNK(i_mode), c, - "extent type %u for non regular file, inode %llu mode %o", - k.k->type, k.k->p.inode, i_mode); + struct bkey_i_inode root_inode, lostfound_inode; + int ret; - unfixable_fsck_err_on(k.k->type != BCH_RESERVATION && - k.k->p.offset > round_up(i_size, PAGE_SIZE) >> 9, c, - "extent type %u offset %llu past end of inode %llu, i_size %llu", - k.k->type, k.k->p.offset, k.k->p.inode, i_size); - } - ret = bch_btree_iter_unlock(&iter); + ret = check_root(c, &root_inode); if (ret) return ret; - cur_inum = -1; - have_inode = false; - for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, - POS(BCACHE_ROOT_INO, 0), k) { - struct bkey_s_c_dirent d; - struct bkey_i_inode target; - bool have_target; - u64 d_inum; - - next_inode(c, k.k->p.inode, &cur_inum, &inode, - &first_this_inode, &have_inode, - &i_size, &i_mode); - - unfixable_fsck_err_on(!have_inode, c, - "dirent in nonexisting directory %llu", - k.k->p.inode); - - unfixable_fsck_err_on(!S_ISDIR(i_mode), c, - "dirent in non directory inode %llu, type %u", - k.k->p.inode, mode_to_type(i_mode)); - - if (k.k->type != BCH_DIRENT) - continue; - - d = bkey_s_c_to_dirent(k); - d_inum = le64_to_cpu(d.v->d_inum); - - unfixable_fsck_err_on(d_inum == d.k->p.inode, c, - "dirent points to own directory"); + ret = check_lostfound(c, &root_inode, &lostfound_inode); + if (ret) + return ret; - have_target = !bch_inode_find_by_inum(c, d_inum, &target); + if (!full_fsck) + goto check_nlinks; - unfixable_fsck_err_on(!have_target, c, - "dirent points to missing inode %llu, type %u filename %s", - d_inum, d.v->d_type, d.v->d_name); + ret = check_extents(c); + if (ret) + return ret; - unfixable_fsck_err_on(have_target && - d.v->d_type != - mode_to_type(le16_to_cpu(target.v.i_mode)), c, - "incorrect d_type: got %u should be %u, filename %s", - d.v->d_type, - mode_to_type(le16_to_cpu(target.v.i_mode)), - d.v->d_name); - } - ret = bch_btree_iter_unlock(&iter); + ret = check_dirents(c); if (ret) return ret; - cur_inum = -1; - have_inode = false; - for_each_btree_key(&iter, c, BTREE_ID_XATTRS, - POS(BCACHE_ROOT_INO, 0), k) { - next_inode(c, k.k->p.inode, &cur_inum, &inode, - &first_this_inode, &have_inode, - &i_size, &i_mode); + ret = check_xattrs(c); + if (ret) + return ret; - unfixable_fsck_err_on(!have_inode, c, - "xattr for missing inode %llu", - k.k->p.inode); - } - ret = bch_btree_iter_unlock(&iter); + ret = check_directory_structure(c, &lostfound_inode); + if (ret) + return ret; +check_nlinks: + ret = check_inode_nlinks(c, &lostfound_inode); if (ret) return ret; return 0; -fsck_err: - bch_btree_iter_unlock(&iter); - return ret; } diff --git a/libbcache/fs-gc.h b/libbcache/fs-gc.h index c44086c..ca6571a 100644 --- a/libbcache/fs-gc.h +++ b/libbcache/fs-gc.h @@ -2,7 +2,6 @@ #define _BCACHE_FS_GC_H s64 bch_count_inode_sectors(struct cache_set *, u64); -int bch_gc_inode_nlinks(struct cache_set *); -int bch_fsck(struct cache_set *); +int bch_fsck(struct cache_set *, bool); #endif /* _BCACHE_FS_GC_H */ diff --git a/libbcache/fs.c b/libbcache/fs.c index 1f01e48..884a950 100644 --- a/libbcache/fs.c +++ b/libbcache/fs.c @@ -26,7 +26,7 @@ static struct kmem_cache *bch_inode_cache; -static void bch_inode_init(struct bch_inode_info *, struct bkey_s_c_inode); +static void bch_vfs_inode_init(struct bch_inode_info *, struct bkey_s_c_inode); /* * I_SIZE_DIRTY requires special handling: @@ -175,7 +175,7 @@ static struct inode *bch_vfs_inode_get(struct super_block *sb, u64 inum) } ei = to_bch_ei(inode); - bch_inode_init(ei, bkey_s_c_to_inode(k)); + bch_vfs_inode_init(ei, bkey_s_c_to_inode(k)); ei->journal_seq = bch_inode_journal_seq(&c->journal, inum); @@ -193,10 +193,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c, struct inode *inode; struct posix_acl *default_acl = NULL, *acl = NULL; struct bch_inode_info *ei; - struct bch_inode *bi; struct bkey_i_inode bkey_inode; - struct timespec ts = CURRENT_TIME; - s64 now = timespec_to_ns(&ts); int ret; inode = new_inode(parent->i_sb); @@ -213,19 +210,8 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c, ei = to_bch_ei(inode); - bi = &bkey_inode_init(&bkey_inode.k_i)->v; - bi->i_uid = cpu_to_le32(i_uid_read(inode)); - bi->i_gid = cpu_to_le32(i_gid_read(inode)); - - bi->i_mode = cpu_to_le16(inode->i_mode); - bi->i_dev = cpu_to_le32(rdev); - bi->i_atime = cpu_to_le64(now); - bi->i_mtime = cpu_to_le64(now); - bi->i_ctime = cpu_to_le64(now); - bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1); - - get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed)); - SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type); + bch_inode_init(c, &bkey_inode, i_uid_read(inode), + i_gid_read(inode), inode->i_mode, rdev); ret = bch_inode_create(c, &bkey_inode.k_i, BLOCKDEV_INODE_MAX, 0, @@ -239,7 +225,7 @@ static struct inode *bch_vfs_inode_create(struct cache_set *c, goto err; } - bch_inode_init(ei, inode_i_to_s_c(&bkey_inode)); + bch_vfs_inode_init(ei, inode_i_to_s_c(&bkey_inode)); if (default_acl) { ret = bch_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); @@ -270,9 +256,13 @@ static int bch_vfs_dirent_create(struct cache_set *c, struct inode *dir, u8 type, const struct qstr *name, struct inode *dst) { + struct bch_inode_info *dir_ei = to_bch_ei(dir); int ret; - ret = bch_dirent_create(c, dir, type, name, dst->i_ino); + ret = bch_dirent_create(c, dir->i_ino, &dir_ei->str_hash, + type, name, dst->i_ino, + &dir_ei->journal_seq, + BCH_HASH_SET_MUST_CREATE); if (unlikely(ret)) return ret; @@ -317,10 +307,13 @@ static struct dentry *bch_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct cache_set *c = dir->i_sb->s_fs_info; + struct bch_inode_info *dir_ei = to_bch_ei(dir); struct inode *inode = NULL; u64 inum; - inum = bch_dirent_lookup(c, dir, &dentry->d_name); + inum = bch_dirent_lookup(c, dir->i_ino, + &dir_ei->str_hash, + &dentry->d_name); if (inum) inode = bch_vfs_inode_get(dir->i_sb, inum); @@ -374,7 +367,8 @@ static int bch_unlink(struct inode *dir, struct dentry *dentry) lockdep_assert_held(&inode->i_rwsem); - ret = bch_dirent_delete(c, dir, &dentry->d_name); + ret = bch_dirent_delete(c, dir->i_ino, &dir_ei->str_hash, + &dentry->d_name, &dir_ei->journal_seq); if (ret) return ret; @@ -1016,8 +1010,8 @@ static const struct address_space_operations bch_address_space_operations = { .error_remove_page = generic_error_remove_page, }; -static void bch_inode_init(struct bch_inode_info *ei, - struct bkey_s_c_inode bkey_inode) +static void bch_vfs_inode_init(struct bch_inode_info *ei, + struct bkey_s_c_inode bkey_inode) { struct inode *inode = &ei->vfs_inode; const struct bch_inode *bi = bkey_inode.v; @@ -1044,8 +1038,7 @@ static void bch_inode_init(struct bch_inode_info *ei, inode->i_ctime = ns_to_timespec(le64_to_cpu(bi->i_ctime)); bch_inode_flags_to_vfs(inode); - ei->str_hash.seed = le64_to_cpu(bi->i_hash_seed); - ei->str_hash.type = INODE_STR_HASH_TYPE(bi); + ei->str_hash = bch_hash_info_init(bi); inode->i_mapping->a_ops = &bch_address_space_operations; diff --git a/libbcache/inode.c b/libbcache/inode.c index d36de43..200deb0 100644 --- a/libbcache/inode.c +++ b/libbcache/inode.c @@ -7,6 +7,8 @@ #include "io.h" #include "keylist.h" +#include <linux/random.h> + ssize_t bch_inode_status(char *buf, size_t len, const struct bkey *k) { if (k->p.offset) @@ -105,6 +107,28 @@ const struct bkey_ops bch_bkey_inode_ops = { .val_to_text = bch_inode_to_text, }; +void bch_inode_init(struct cache_set *c, struct bkey_i_inode *inode, + uid_t uid, gid_t gid, umode_t mode, dev_t rdev) +{ + struct timespec ts = CURRENT_TIME; + s64 now = timespec_to_ns(&ts); + struct bch_inode *bi; + + bi = &bkey_inode_init(&inode->k_i)->v; + bi->i_uid = cpu_to_le32(uid); + bi->i_gid = cpu_to_le32(gid); + + bi->i_mode = cpu_to_le16(mode); + bi->i_dev = cpu_to_le32(rdev); + bi->i_atime = cpu_to_le64(now); + bi->i_mtime = cpu_to_le64(now); + bi->i_ctime = cpu_to_le64(now); + bi->i_nlink = cpu_to_le32(S_ISDIR(mode) ? 2 : 1); + + get_random_bytes(&bi->i_hash_seed, sizeof(bi->i_hash_seed)); + SET_INODE_STR_HASH_TYPE(bi, c->sb.str_hash_type); +} + int bch_inode_create(struct cache_set *c, struct bkey_i *inode, u64 min, u64 max, u64 *hint) { @@ -228,15 +252,14 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr, { struct btree_iter iter; struct bkey_s_c k; - int ret = -ENOENT; for_each_btree_key_with_holes(&iter, c, BTREE_ID_INODES, POS(inode_nr, 0), k) { switch (k.k->type) { case BCH_INODE_FS: - ret = 0; bkey_reassemble(&inode->k_i, k); - break; + bch_btree_iter_unlock(&iter); + return 0; default: /* hole, not found */ break; @@ -245,9 +268,8 @@ int bch_inode_find_by_inum(struct cache_set *c, u64 inode_nr, break; } - bch_btree_iter_unlock(&iter); - return ret; + return bch_btree_iter_unlock(&iter) ?: -ENOENT; } int bch_cached_dev_inode_find_by_uuid(struct cache_set *c, uuid_le *uuid, diff --git a/libbcache/inode.h b/libbcache/inode.h index d8b28c7..fa1a4cf 100644 --- a/libbcache/inode.h +++ b/libbcache/inode.h @@ -5,6 +5,8 @@ extern const struct bkey_ops bch_bkey_inode_ops; ssize_t bch_inode_status(char *, size_t, const struct bkey *); +void bch_inode_init(struct cache_set *, struct bkey_i_inode *, + uid_t, gid_t, umode_t, dev_t); int bch_inode_create(struct cache_set *, struct bkey_i *, u64, u64, u64 *); int bch_inode_truncate(struct cache_set *, u64, u64, struct extent_insert_hook *, u64 *); diff --git a/libbcache/io.c b/libbcache/io.c index 7219b65..4112ea5 100644 --- a/libbcache/io.c +++ b/libbcache/io.c @@ -140,6 +140,8 @@ void bch_submit_wbio_replicas(struct bch_write_bio *wbio, struct cache_set *c, struct bch_write_bio *n; struct cache *ca; + BUG_ON(c->opts.nochanges); + wbio->split = false; wbio->c = c; @@ -738,7 +740,8 @@ void bch_write(struct closure *cl) !(op->flags & BCH_WRITE_CACHED), op->flags & BCH_WRITE_DISCARD); - if (!percpu_ref_tryget(&c->writes)) { + if (c->opts.nochanges || + !percpu_ref_tryget(&c->writes)) { __bcache_io_error(c, "read only"); op->error = -EROFS; bch_disk_reservation_put(c, &op->res); diff --git a/libbcache/journal.c b/libbcache/journal.c index ffc9573..9e09b86 100644 --- a/libbcache/journal.c +++ b/libbcache/journal.c @@ -478,14 +478,14 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, char buf[160]; int ret = 0; - if (fsck_err_on(!k->k.u64s, c, + if (mustfix_fsck_err_on(!k->k.u64s, c, "invalid %s in journal: k->u64s 0", type)) { entry->u64s = cpu_to_le16((u64 *) k - entry->_data); journal_entry_null_range(jset_keys_next(entry), next); return 0; } - if (fsck_err_on((void *) bkey_next(k) > + if (mustfix_fsck_err_on((void *) bkey_next(k) > (void *) jset_keys_next(entry), c, "invalid %s in journal: extends past end of journal entry", type)) { @@ -494,7 +494,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, return 0; } - if (fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c, + if (mustfix_fsck_err_on(k->k.format != KEY_FORMAT_CURRENT, c, "invalid %s in journal: bad format %u", type, k->k.format)) { le16_add_cpu(&entry->u64s, -k->k.u64s); @@ -510,7 +510,7 @@ static int journal_validate_key(struct cache_set *c, struct jset *j, if (invalid) { bch_bkey_val_to_text(c, key_type, buf, sizeof(buf), bkey_i_to_s_c(k)); - fsck_err(c, "invalid %s in journal: %s", type, buf); + mustfix_fsck_err(c, "invalid %s in journal: %s", type, buf); le16_add_cpu(&entry->u64s, -k->k.u64s); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); @@ -543,7 +543,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto return BCH_FSCK_UNKNOWN_VERSION; } - if (fsck_err_on(bytes > bucket_sectors_left << 9 || + if (mustfix_fsck_err_on(bytes > bucket_sectors_left << 9 || bytes > c->journal.entry_size_max, c, "journal entry too big (%zu bytes), sector %lluu", bytes, sector)) { @@ -556,7 +556,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto got = le64_to_cpu(j->csum); expect = __csum_set(j, le32_to_cpu(j->u64s), JSET_CSUM_TYPE(j)); - if (fsck_err_on(got != expect, c, + if (mustfix_fsck_err_on(got != expect, c, "journal checksum bad (got %llu expect %llu), sector %lluu", got, expect, sector)) { /* XXX: retry IO, when we start retrying checksum errors */ @@ -564,14 +564,14 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto return JOURNAL_ENTRY_BAD; } - if (fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), c, - "invalid journal entry: last_seq > seq")) + if (mustfix_fsck_err_on(le64_to_cpu(j->last_seq) > le64_to_cpu(j->seq), + c, "invalid journal entry: last_seq > seq")) j->last_seq = j->seq; for_each_jset_entry(entry, j) { struct bkey_i *k; - if (fsck_err_on(jset_keys_next(entry) > + if (mustfix_fsck_err_on(jset_keys_next(entry) > bkey_idx(j, le32_to_cpu(j->u64s)), c, "journal entry extents past end of jset")) { j->u64s = cpu_to_le64((u64 *) entry - j->_data); @@ -595,7 +595,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto case JOURNAL_ENTRY_BTREE_ROOT: k = entry->start; - if (fsck_err_on(!entry->u64s || + if (mustfix_fsck_err_on(!entry->u64s || le16_to_cpu(entry->u64s) != k->k.u64s, c, "invalid btree root journal entry: wrong number of keys")) { journal_entry_null_range(entry, @@ -613,7 +613,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto break; case JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED: - if (fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, + if (mustfix_fsck_err_on(le16_to_cpu(entry->u64s) != 1, c, "invalid journal seq blacklist entry: bad size")) { journal_entry_null_range(entry, jset_keys_next(entry)); @@ -621,7 +621,7 @@ static int journal_entry_validate(struct cache_set *c, struct jset *j, u64 secto break; default: - fsck_err(c, "invalid journal entry type %llu", + mustfix_fsck_err(c, "invalid journal entry type %llu", JOURNAL_ENTRY_TYPE(entry)); journal_entry_null_range(entry, jset_keys_next(entry)); break; @@ -2065,6 +2065,13 @@ static void journal_write(struct closure *cl) bch_check_mark_super(c, &j->key, true); + /* + * XXX: we really should just disable the entire journal in nochanges + * mode + */ + if (c->opts.nochanges) + goto no_io; + extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) { rcu_read_lock(); ca = PTR_CACHE(c, ptr); @@ -2094,8 +2101,6 @@ static void journal_write(struct closure *cl) trace_bcache_journal_write(bio); closure_bio_submit_punt(bio, cl, c); - ptr->offset += sectors; - ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); } @@ -2114,6 +2119,10 @@ static void journal_write(struct closure *cl) closure_bio_submit_punt(bio, cl, c); } +no_io: + extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) + ptr->offset += sectors; + closure_return_with_destructor(cl, journal_write_done); } diff --git a/libbcache/movinggc.c b/libbcache/movinggc.c index 3c85d49..cb4f165 100644 --- a/libbcache/movinggc.c +++ b/libbcache/movinggc.c @@ -26,14 +26,11 @@ static const struct bch_extent_ptr *moving_pred(struct cache *ca, { const struct bch_extent_ptr *ptr; - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) - if ((ca->sb.nr_this_dev == ptr->dev) && - PTR_BUCKET(ca, ptr)->mark.copygc) - return ptr; - } + if (bkey_extent_is_data(k.k) && + (ptr = bch_extent_has_device(bkey_s_c_to_extent(k), + ca->sb.nr_this_dev)) && + PTR_BUCKET(ca, ptr)->mark.copygc) + return ptr; return NULL; } @@ -274,6 +271,9 @@ int bch_moving_gc_thread_start(struct cache *ca) /* The moving gc read thread must be stopped */ BUG_ON(ca->moving_gc_read != NULL); + if (ca->set->opts.nochanges) + return 0; + if (cache_set_init_fault("moving_gc_start")) return -ENOMEM; diff --git a/libbcache/opts.c b/libbcache/opts.c index 249dd5d..60a2a4d 100644 --- a/libbcache/opts.c +++ b/libbcache/opts.c @@ -4,16 +4,6 @@ #include "opts.h" #include "util.h" -const char * const bch_bool_opt[] = { - "0", - "1", - NULL -}; - -const char * const bch_uint_opt[] = { - NULL -}; - const char * const bch_error_actions[] = { "continue", "remount-ro", @@ -43,6 +33,42 @@ const char * const bch_str_hash_types[] = { NULL }; +const char * const bch_cache_replacement_policies[] = { + "lru", + "fifo", + "random", + NULL +}; + +/* Default is -1; we skip past it for struct cached_dev's cache mode */ +const char * const bch_cache_modes[] = { + "default", + "writethrough", + "writeback", + "writearound", + "none", + NULL +}; + +const char * const bch_cache_state[] = { + "active", + "readonly", + "failed", + "spare", + NULL +}; + + +const char * const bch_bool_opt[] = { + "0", + "1", + NULL +}; + +const char * const bch_uint_opt[] = { + NULL +}; + enum bch_opts { #define CACHE_SET_OPT(_name, _choices, _min, _max, _sb_opt, _perm) \ Opt_##_name, diff --git a/libbcache/opts.h b/libbcache/opts.h index 1d19ac6..70df232 100644 --- a/libbcache/opts.h +++ b/libbcache/opts.h @@ -6,6 +6,14 @@ #include <linux/log2.h> #include <linux/string.h> +extern const char * const bch_error_actions[]; +extern const char * const bch_csum_types[]; +extern const char * const bch_compression_types[]; +extern const char * const bch_str_hash_types[]; +extern const char * const bch_cache_replacement_policies[]; +extern const char * const bch_cache_modes[]; +extern const char * const bch_cache_state[]; + /* * Mount options; we also store defaults in the superblock. * @@ -20,10 +28,6 @@ extern const char * const bch_bool_opt[]; extern const char * const bch_uint_opt[]; -extern const char * const bch_error_actions[]; -extern const char * const bch_csum_types[]; -extern const char * const bch_compression_types[]; -extern const char * const bch_str_hash_types[]; /* dummy option, for options that aren't stored in the superblock */ LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0); @@ -44,6 +48,15 @@ LE64_BITMASK(NO_SB_OPT, struct cache_sb, flags, 0, 0); CACHE_SET_OPT(fix_errors, \ bch_bool_opt, 0, 2, \ NO_SB_OPT, true) \ + CACHE_SET_OPT(nochanges, \ + bch_bool_opt, 0, 2, \ + NO_SB_OPT, 0) \ + CACHE_SET_OPT(noreplay, \ + bch_bool_opt, 0, 2, \ + NO_SB_OPT, 0) \ + CACHE_SET_OPT(norecovery, \ + bch_bool_opt, 0, 2, \ + NO_SB_OPT, 0) \ CACHE_SET_SB_OPTS() #define CACHE_SET_OPTS() \ diff --git a/libbcache/str_hash.h b/libbcache/str_hash.h index 9a718a8..a489304 100644 --- a/libbcache/str_hash.h +++ b/libbcache/str_hash.h @@ -79,6 +79,14 @@ struct bch_hash_info { u8 type; }; +static inline struct bch_hash_info bch_hash_info_init(const struct bch_inode *bi) +{ + return (struct bch_hash_info) { + .seed = le64_to_cpu(bi->i_hash_seed), + .type = INODE_STR_HASH_TYPE(bi), + }; +} + struct bch_hash_desc { enum btree_id btree_id; u8 key_type; diff --git a/libbcache/super.c b/libbcache/super.c index 5f6a85e..296700b 100644 --- a/libbcache/super.c +++ b/libbcache/super.c @@ -99,14 +99,17 @@ static bool bch_is_open(struct block_device *bdev) } static const char *bch_blkdev_open(const char *path, void *holder, + struct cache_set_opts opts, struct block_device **ret) { struct block_device *bdev; + fmode_t mode = opts.nochanges > 0 + ? FMODE_READ + : FMODE_READ|FMODE_WRITE|FMODE_EXCL; const char *err; *ret = NULL; - bdev = blkdev_get_by_path(path, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - holder); + bdev = blkdev_get_by_path(path, mode, holder); if (bdev == ERR_PTR(-EBUSY)) { bdev = lookup_bdev(path); @@ -369,6 +372,7 @@ int bch_super_realloc(struct bcache_superblock *sb, unsigned u64s) } static const char *read_super(struct bcache_superblock *sb, + struct cache_set_opts opts, const char *path) { const char *err; @@ -378,7 +382,7 @@ static const char *read_super(struct bcache_superblock *sb, memset(sb, 0, sizeof(*sb)); - err = bch_blkdev_open(path, &sb, &sb->bdev); + err = bch_blkdev_open(path, &sb, opts, &sb->bdev); if (err) return err; retry: @@ -614,6 +618,9 @@ static void __bcache_write_super(struct cache_set *c) closure_init(cl, &c->cl); + if (c->opts.nochanges) + goto no_io; + le64_add_cpu(&c->disk_sb.seq, 1); for_each_cache(ca, c, i) { @@ -636,7 +643,7 @@ static void __bcache_write_super(struct cache_set *c) percpu_ref_get(&ca->ref); __write_super(c, &ca->disk_sb); } - +no_io: closure_return_with_destructor(cl, bcache_write_super_unlock); } @@ -1147,6 +1154,9 @@ static struct cache_set *bch_cache_set_alloc(struct cache_sb *sb, c->opts = cache_superblock_opts(sb); cache_set_opts_apply(&c->opts, opts); + c->opts.nochanges |= c->opts.noreplay; + c->opts.read_only |= c->opts.nochanges; + c->block_bits = ilog2(c->sb.block_size); if (cache_set_init_fault("cache_set_alloc")) @@ -1339,6 +1349,9 @@ static const char *run_cache_set(struct cache_set *c) if (bch_initial_gc(c, &journal)) goto err; + if (c->opts.noreplay) + goto recovery_done; + bch_verbose(c, "mark and sweep done"); /* @@ -1365,6 +1378,9 @@ static const char *run_cache_set(struct cache_set *c) bch_verbose(c, "journal replay done"); + if (c->opts.norecovery) + goto recovery_done; + /* * Write a new journal entry _before_ we start journalling new * data - otherwise, we could end up with btree node bsets with @@ -1376,21 +1392,12 @@ static const char *run_cache_set(struct cache_set *c) if (bch_journal_meta(&c->journal)) goto err; - bch_verbose(c, "starting fs gc:"); - err = "error in fs gc"; - ret = bch_gc_inode_nlinks(c); + bch_verbose(c, "starting fsck:"); + err = "error in fsck"; + ret = bch_fsck(c, !c->opts.nofsck); if (ret) goto err; - bch_verbose(c, "fs gc done"); - - if (!c->opts.nofsck) { - bch_verbose(c, "starting fsck:"); - err = "error in fsck"; - ret = bch_fsck(c); - if (ret) - goto err; - bch_verbose(c, "fsck done"); - } + bch_verbose(c, "fsck done"); } else { struct bkey_i_inode inode; struct closure cl; @@ -1433,12 +1440,9 @@ static const char *run_cache_set(struct cache_set *c) /* Wait for new btree roots to be written: */ closure_sync(&cl); - bkey_inode_init(&inode.k_i); + bch_inode_init(c, &inode, 0, 0, + S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0); inode.k.p.inode = BCACHE_ROOT_INO; - inode.v.i_mode = cpu_to_le16(S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO); - inode.v.i_nlink = cpu_to_le32(2); - get_random_bytes(&inode.v.i_hash_seed, sizeof(inode.v.i_hash_seed)); - SET_INODE_STR_HASH_TYPE(&inode.v, c->sb.str_hash_type); err = "error creating root directory"; if (bch_btree_insert(c, BTREE_ID_INODES, &inode.k_i, @@ -1449,7 +1453,7 @@ static const char *run_cache_set(struct cache_set *c) if (bch_journal_meta(&c->journal)) goto err; } - +recovery_done: if (c->opts.read_only) { bch_cache_set_read_only_sync(c); } else { @@ -1485,12 +1489,12 @@ static const char *run_cache_set(struct cache_set *c) set_bit(CACHE_SET_RUNNING, &c->flags); bch_attach_backing_devs(c); - closure_put(&c->caching); - bch_notify_cache_set_read_write(c); - - BUG_ON(!list_empty(&journal)); - return NULL; + err = NULL; +out: + bch_journal_entries_free(&journal); + closure_put(&c->caching); + return err; err: switch (ret) { case BCH_FSCK_ERRORS_NOT_FIXED: @@ -1519,12 +1523,8 @@ err: } BUG_ON(!err); - - bch_journal_entries_free(&journal); set_bit(CACHE_SET_ERROR, &c->flags); - bch_cache_set_unregister(c); - closure_put(&c->caching); - return err; + goto out; } static const char *can_add_cache(struct cache_sb *sb, @@ -2056,8 +2056,9 @@ static const char *register_cache(struct bcache_superblock *sb, struct cache_set_opts opts) { char name[BDEVNAME_SIZE]; - const char *err = "cannot allocate memory"; + const char *err; struct cache_set *c; + bool allocated_cache_set = false; err = validate_cache_super(sb); if (err) @@ -2067,41 +2068,36 @@ static const char *register_cache(struct bcache_superblock *sb, c = cache_set_lookup(sb->sb->set_uuid); if (c) { - if ((err = (can_attach_cache(sb->sb, c) ?: - cache_alloc(sb, c, NULL)))) + err = can_attach_cache(sb->sb, c); + if (err) return err; + } else { + c = bch_cache_set_alloc(sb->sb, opts); + if (!c) + return "cannot allocate memory"; - if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) { - err = run_cache_set(c); - if (err) - return err; - } - goto out; + allocated_cache_set = true; } - c = bch_cache_set_alloc(sb->sb, opts); - if (!c) - return err; - err = cache_alloc(sb, c, NULL); if (err) - goto err_stop; + goto err; if (cache_set_nr_online_devices(c) == cache_set_nr_devices(c)) { err = run_cache_set(c); if (err) - goto err_stop; + goto err; + } else { + err = "error creating kobject"; + if (bch_cache_set_online(c)) + goto err; } - err = "error creating kobject"; - if (bch_cache_set_online(c)) - goto err_stop; -out: - bch_info(c, "started"); return NULL; -err_stop: - bch_cache_set_stop(c); +err: + if (allocated_cache_set) + bch_cache_set_stop(c); return err; } @@ -2117,7 +2113,7 @@ int bch_cache_set_add_cache(struct cache_set *c, const char *path) mutex_lock(&bch_register_lock); - err = read_super(&sb, path); + err = read_super(&sb, c->opts, path); if (err) goto err_unlock; @@ -2261,7 +2257,7 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices, mutex_lock(&bch_register_lock); for (i = 0; i < nr_devices; i++) { - err = read_super(&sb[i], devices[i]); + err = read_super(&sb[i], opts, devices[i]); if (err) goto err_unlock; @@ -2312,6 +2308,8 @@ const char *bch_register_cache_set(char * const *devices, unsigned nr_devices, out: kfree(sb); module_put(THIS_MODULE); + if (err) + c = NULL; return err; err_unlock: if (c) @@ -2326,18 +2324,19 @@ err: const char *bch_register_one(const char *path) { struct bcache_superblock sb; + struct cache_set_opts opts = cache_set_opts_empty(); const char *err; mutex_lock(&bch_register_lock); - err = read_super(&sb, path); + err = read_super(&sb, opts, path); if (err) goto err; if (__SB_IS_BDEV(le64_to_cpu(sb.sb->version))) err = bch_backing_dev_register(&sb); else - err = register_cache(&sb, cache_set_opts_empty()); + err = register_cache(&sb, opts); free_super(&sb); err: diff --git a/libbcache/sysfs.c b/libbcache/sysfs.c index 40d006b..58a7125 100644 --- a/libbcache/sysfs.c +++ b/libbcache/sysfs.c @@ -24,31 +24,6 @@ #include <linux/blkdev.h> #include <linux/sort.h> -static const char * const cache_replacement_policies[] = { - "lru", - "fifo", - "random", - NULL -}; - -/* Default is -1; we skip past it for struct cached_dev's cache mode */ -static const char * const bch_cache_modes[] = { - "default", - "writethrough", - "writeback", - "writearound", - "none", - NULL -}; - -static const char * const bch_cache_state[] = { - "active", - "readonly", - "failed", - "spare", - NULL -}; - write_attribute(attach); write_attribute(detach); write_attribute(unregister); @@ -1237,7 +1212,7 @@ SHOW(bch_cache) if (attr == &sysfs_cache_replacement_policy) return bch_snprint_string_list(buf, PAGE_SIZE, - cache_replacement_policies, + bch_cache_replacement_policies, ca->mi.replacement); sysfs_print(tier, ca->mi.tier); @@ -1281,7 +1256,7 @@ STORE(__bch_cache) } if (attr == &sysfs_cache_replacement_policy) { - ssize_t v = bch_read_string_list(buf, cache_replacement_policies); + ssize_t v = bch_read_string_list(buf, bch_cache_replacement_policies); if (v < 0) return v; diff --git a/libbcache/tier.c b/libbcache/tier.c index 2b568e1..39b04f7 100644 --- a/libbcache/tier.c +++ b/libbcache/tier.c @@ -224,6 +224,9 @@ int bch_tiering_read_start(struct cache_set *c) { struct task_struct *t; + if (c->opts.nochanges) + return 0; + t = kthread_create(bch_tiering_thread, c, "bch_tier_read"); if (IS_ERR(t)) return PTR_ERR(t); |