diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2016-07-21 19:05:06 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2016-07-30 01:13:53 -0800 |
commit | ea331bf0715c6c9d4d91e0e83127bd30aa5e5969 (patch) | |
tree | 4e0ff677e2f0ecce3daac21819054332494a9f1f | |
parent | 942a8988055dafee337a3d138e1d6190d36a4a63 (diff) |
bcache: lift restrictions on 0 size extents
-rw-r--r-- | drivers/md/bcache/bset.c | 124 | ||||
-rw-r--r-- | drivers/md/bcache/bset.h | 18 | ||||
-rw-r--r-- | drivers/md/bcache/btree_io.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 195 |
4 files changed, 68 insertions, 271 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index cae3b1f02f41..594f54380c9f 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -20,8 +20,10 @@ #include "alloc_types.h" #include <trace/events/bcache.h> -static inline bool btree_node_iter_set_cmp(struct btree_node_iter *, - struct btree_keys *, +static inline int btree_node_iter_cmp(struct btree_keys *, + const struct bkey_packed *, + const struct bkey_packed *); +static inline bool btree_node_iter_set_cmp(struct btree_keys *, struct btree_node_iter_set, struct btree_node_iter_set); @@ -53,20 +55,6 @@ struct bset_tree *bch_bkey_to_bset(struct btree_keys *b, struct bkey_packed *k) * by the time we actually do the insert will all be deleted. */ -static bool keys_out_of_order(const struct bkey_format *f, - const struct bkey_packed *prev, - const struct bkey_packed *next, - bool is_extents) -{ - struct bkey nextu = bkey_unpack_key(f, next); - - return bkey_cmp_left_packed(f, prev, bkey_start_pos(&nextu)) > 0 || - ((is_extents - ? !bkey_deleted(next) - : !bkey_deleted(prev)) && - !bkey_cmp_packed(f, prev, next)); -} - #ifdef CONFIG_BCACHE_DEBUG void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) @@ -155,7 +143,7 @@ static void bch_btree_node_iter_next_check(struct btree_node_iter *iter, bkey_unpack_key(f, k); if (n && - keys_out_of_order(f, k, n, iter->is_extents)) { + btree_node_iter_cmp(b, k, n) > 0) { struct bkey ku = bkey_unpack_key(f, k); struct bkey nu = bkey_unpack_key(f, n); char buf1[80], buf2[80]; @@ -177,7 +165,7 @@ void bch_btree_node_iter_verify(struct btree_node_iter *iter, btree_node_iter_for_each(iter, set) { BUG_ON(set + 1 < iter->data + MAX_BSETS && set[1].k != set[1].end && - btree_node_iter_set_cmp(iter, b, set[0], set[1])); + btree_node_iter_set_cmp(b, set[0], set[1])); k = __btree_node_offset_to_key(b, set->k); t = bch_bkey_to_bset(b, k); @@ -188,53 +176,37 @@ void bch_btree_node_iter_verify(struct btree_node_iter *iter, } void bch_verify_key_order(struct btree_keys *b, - struct btree_node_iter *iter, + struct btree_node_iter *_iter, struct bkey_packed *where) { const struct bkey_format *f = &b->format; + struct btree_node_iter iter = *_iter; struct bset_tree *t = bch_bkey_to_bset(b, where); - struct bkey_packed *k, *prev; + struct bkey_packed *k; struct bkey uk, uw = bkey_unpack_key(f, where); k = bkey_prev(t, where); BUG_ON(k && - keys_out_of_order(f, k, where, b->ops->is_extents)); + btree_node_iter_cmp(b, k, where) > 0); k = bkey_next(where); BUG_ON(k != bset_bkey_last(t->data) && - keys_out_of_order(f, where, k, b->ops->is_extents)); - - for (t = b->set; t <= b->set + b->nsets; t++) { - if (!t->data->u64s) - continue; - - if (where >= t->data->start && - where < bset_bkey_last(t->data)) - continue; - - k = bch_btree_node_iter_bset_pos(iter, b, t->data) ?: - bkey_prev(t, bset_bkey_last(t->data)); + btree_node_iter_cmp(b, where, k) > 0); - while (bkey_cmp_left_packed(f, k, bkey_start_pos(&uw)) > 0 && - (prev = bkey_prev(t, k))) - k = prev; + if (b->ops->is_extents) { + do { + k = bch_btree_node_iter_prev_all(&iter, b); + } while (k && bkey_deleted(k)); - for (; - k != bset_bkey_last(t->data); - k = bkey_next(k)) { - uk = bkey_unpack_key(f, k); + BUG_ON(k && (uk = bkey_unpack_key(f, k), + bkey_cmp(uk.p, bkey_start_pos(&uw)) > 0)); - if (b->ops->is_extents) { - BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 || - bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0)); - } else { - BUG_ON(!bkey_cmp(uw.p, uk.p) && - !bkey_deleted(&uk)); - } + iter = *_iter; + bch_btree_node_iter_advance(&iter, b); + k = bch_btree_node_iter_peek(&iter, b); - if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0) - break; - } + BUG_ON(k && (uk = bkey_unpack_key(f, k), + bkey_cmp(uw.p, bkey_start_pos(&uk)) > 0)); } } @@ -1230,44 +1202,28 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b, /* Btree node iterator */ -static inline bool btree_node_iter_cmp(struct btree_keys *b, - bool is_extents, - struct bkey_packed *l, - struct bkey_packed *r) +static inline int btree_node_iter_cmp(struct btree_keys *b, + const struct bkey_packed *l, + const struct bkey_packed *r) { - s64 c = bkey_cmp_packed(&b->format, l, r); - - /* - * For non extents, when keys compare equal the deleted keys have to - * come first - so that bch_btree_node_iter_next_check() can detect - * duplicate nondeleted keys (and possibly other reasons?) - * - * For extents, bkey_deleted() is used as a proxy for k->size == 0, so - * deleted keys have to sort last. - */ - return c ? c > 0 - : is_extents - ? bkey_deleted(l) > bkey_deleted(r) - : bkey_deleted(l) < bkey_deleted(r); + /* When keys compare equal the deleted keys come first: */ + return bkey_cmp_packed(&b->format, l, r) ?: + (int) bkey_deleted(r) - (int) bkey_deleted(l); } /* * Returns true if l > r: */ -static inline bool btree_node_iter_set_cmp(struct btree_node_iter *iter, - struct btree_keys *b, +static inline bool btree_node_iter_set_cmp(struct btree_keys *b, struct btree_node_iter_set l, struct btree_node_iter_set r) { - return btree_node_iter_cmp(b, iter->is_extents, + return btree_node_iter_cmp(b, __btree_node_offset_to_key(b, l.k), - __btree_node_offset_to_key(b, r.k)); + __btree_node_offset_to_key(b, r.k)) > 0; } -#define btree_node_iter_cmp_heap(_l, _r) \ - (btree_node_iter_cmp(b, (iter)->is_extents, \ - __btree_node_offset_to_key(b, (_l).k), \ - __btree_node_offset_to_key(b, (_r).k))) +#define btree_node_iter_cmp_heap(_l, _r) btree_node_iter_set_cmp(b, _l, _r) void bch_btree_node_iter_push(struct btree_node_iter *iter, struct btree_keys *b, @@ -1285,7 +1241,7 @@ void bch_btree_node_iter_push(struct btree_node_iter *iter, }); btree_node_iter_for_each(iter, pos) - if (!btree_node_iter_set_cmp(iter, b, n, *pos)) + if (!btree_node_iter_set_cmp(b, n, *pos)) break; EBUG_ON(pos >= iter->data + MAX_BSETS); @@ -1320,8 +1276,8 @@ void bch_btree_node_iter_push(struct btree_node_iter *iter, * This works out ok, but it's something to be aware of: * * - For non extents, we guarantee that the live key comes last - see - * btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't - * see will only be deleted keys you don't care about. + * btree_node_iter_cmp(). So the duplicates you don't see will only be + * deleted keys you don't care about. * * - For extents, deleted keys sort last (see the comment at the top of this * file). But when you're searching for extents, you actually want the first @@ -1364,7 +1320,7 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter, BUG(); } - __bch_btree_node_iter_init(iter, b); + memset(iter, 0, sizeof(*iter)); for (t = b->set; t <= b->set + b->nsets; t++) bch_btree_node_iter_push(iter, b, @@ -1381,7 +1337,7 @@ void bch_btree_node_iter_init_from_start(struct btree_node_iter *iter, { struct bset_tree *t; - __bch_btree_node_iter_init(iter, b); + memset(iter, 0, sizeof(*iter)); for (t = b->set; t <= b->set + b->nsets; t++) bch_btree_node_iter_push(iter, b, @@ -1413,7 +1369,7 @@ static inline void btree_node_iter_sift(struct btree_node_iter *iter, for (i = start; i + 1 < MAX_BSETS && iter->data[i + 1].k != iter->data[i + 1].end && - btree_node_iter_set_cmp(iter, b, iter->data[i], iter->data[i + 1]); + btree_node_iter_set_cmp(b, iter->data[i], iter->data[i + 1]); i++) swap(iter->data[i], iter->data[i + 1]); } @@ -1484,7 +1440,7 @@ found: if (k && (!prev || - btree_node_iter_cmp(b, iter->is_extents, k, prev))) { + btree_node_iter_cmp(b, prev, k) < 0)) { prev = k; prev_set = i; prev_t = t; @@ -1716,7 +1672,7 @@ struct btree_nr_keys bch_sort_bsets(struct bset *dst, struct btree_keys *b, struct bset_tree *t; iter = &_iter; - __bch_btree_node_iter_large_init(iter, b); + iter->used = 0; for (t = b->set + from; t <= b->set + b->nsets; t++) bch_btree_node_iter_large_push(iter, b, diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index df13473506ab..1dd1fc12b51a 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -442,20 +442,11 @@ static inline enum bch_extent_overlap bch_extent_overlap(const struct bkey *k, /* Btree key iteration */ struct btree_node_iter { - u8 is_extents; - struct btree_node_iter_set { u16 k, end; } data[MAX_BSETS]; }; -static inline void __bch_btree_node_iter_init(struct btree_node_iter *iter, - struct btree_keys *b) -{ - iter->is_extents = b->ops->is_extents; - memset(iter->data, 0, sizeof(iter->data)); -} - void bch_btree_node_iter_push(struct btree_node_iter *, struct btree_keys *, const struct bkey_packed *, const struct bkey_packed *); @@ -559,20 +550,11 @@ struct bkey_s_c bch_btree_node_iter_peek_unpack(struct btree_node_iter *, /* Sorting */ struct btree_node_iter_large { - u8 is_extents; u16 used; struct btree_node_iter_set data[MAX_BSETS]; }; -static inline void -__bch_btree_node_iter_large_init(struct btree_node_iter_large *iter, - struct btree_keys *b) -{ - iter->used = 0; - iter->is_extents = b->ops->is_extents; -} - void bch_btree_node_iter_large_advance(struct btree_node_iter_large *, struct btree_keys *); diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c index 2e362003a44d..f90fc417aed1 100644 --- a/drivers/md/bcache/btree_io.c +++ b/drivers/md/bcache/btree_io.c @@ -267,7 +267,7 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b, int ret; iter = mempool_alloc(&c->fill_iter, GFP_NOIO); - __bch_btree_node_iter_large_init(iter, &b->keys); + iter->used = 0; err = "dynamic fault"; if (bch_meta_read_fault("btree")) diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 10ce62fb673b..951850c0eb27 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -1254,6 +1254,11 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k))); EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0); + if (!k.k->size) { + bch_btree_node_iter_advance(node_iter, &b->keys); + continue; + } + if (bkey_cmp(bkey_start_pos(k.k), insert->k->k.p) >= 0) break; @@ -1274,27 +1279,21 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, } } - /* - * Only call advance pos & call hook for nonzero size extents: - * If hook returned BTREE_HOOK_NO_INSERT, @insert->k no longer - * overlaps with @k: - */ - if (k.k->size) - switch (extent_insert_advance_pos(trans, insert, hook, - &committed_pos, - k.s_c, res, flags, - &stats)) { - case BTREE_HOOK_DO_INSERT: - break; - case BTREE_HOOK_NO_INSERT: - continue; - case BTREE_HOOK_RESTART_TRANS: - ret = BTREE_INSERT_NEED_TRAVERSE; - goto stop; - } + switch (extent_insert_advance_pos(trans, insert, hook, + &committed_pos, + k.s_c, res, flags, + &stats)) { + case BTREE_HOOK_DO_INSERT: + break; + case BTREE_HOOK_NO_INSERT: + continue; + case BTREE_HOOK_RESTART_TRANS: + ret = BTREE_INSERT_NEED_TRAVERSE; + goto stop; + } /* k is the key currently in the tree, 'insert' is the new key */ - switch (bch_extent_overlap(&insert->k->k, k.k)) { + switch (overlap) { case BCH_EXTENT_OVERLAP_FRONT: /* insert overlaps with start of k: */ bch_cut_subtract_front(iter, insert->k->k.p, k, &stats); @@ -1319,49 +1318,15 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans, bch_btree_node_iter_fix(iter, b, node_iter, _k, true); break; - case BCH_EXTENT_OVERLAP_ALL: { - struct bpos orig_pos = k.k->p; - + case BCH_EXTENT_OVERLAP_ALL: /* The insert key completely covers k, invalidate k */ if (!bkey_deleted(_k)) btree_keys_account_key_drop(&b->keys.nr, _k); bch_drop_subtract(iter, k, &stats); - k.k->p = bkey_start_pos(&insert->k->k); - if (!__extent_save(&b->keys, node_iter, _k, k.k)) { - /* - * Couldn't repack: we aren't necessarily able - * to repack if the new key is outside the range - * of the old extent, so we have to split - * @insert: - */ - k.k->p = orig_pos; - extent_save(&b->keys, node_iter, _k, k.k); - - if (extent_insert_advance_pos(trans, insert, - hook, &committed_pos, - k.s_c, res, flags, - &stats) == - BTREE_HOOK_RESTART_TRANS) { - ret = BTREE_INSERT_NEED_TRAVERSE; - goto stop; - } - extent_insert_committed(trans, insert, committed_pos, - res, flags, &stats); - /* - * We split and inserted upto at k.k->p - that - * has to coincide with iter->pos, so that we - * don't have anything more we have to insert - * until we recheck our journal reservation: - */ - EBUG_ON(bkey_cmp(committed_pos, k.k->p)); - } else { - bch_bset_fix_invalidated_key(&b->keys, _k); - bch_btree_node_iter_fix(iter, b, node_iter, _k, true); - } - + extent_save(&b->keys, node_iter, _k, k.k); break; - } + case BCH_EXTENT_OVERLAP_MIDDLE: { BKEY_PADDED(k) split; /* @@ -1952,113 +1917,17 @@ static enum merge_result bch_extent_merge(struct btree_keys *bk, return BCH_MERGE_MERGE; } -static void extent_i_save(struct btree_keys *b, struct btree_node_iter *iter, +static bool extent_i_save(struct btree_keys *b, struct btree_node_iter *iter, struct bkey_packed *dst, struct bkey_i *src) { struct bkey_format *f = &b->format; BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k)); - extent_save(b, iter, dst, &src->k); - memcpy(bkeyp_val(f, dst), &src->v, bkey_val_bytes(&src->k)); -} - -static bool extent_merge_one_overlapping(struct btree_iter *iter, - struct bpos new_pos, - struct bkey_packed *k, struct bkey uk, - bool check, bool could_pack) -{ - struct btree *b = iter->nodes[0]; - struct btree_node_iter *node_iter = &iter->node_iters[0]; - - BUG_ON(!bkey_deleted(k)); - - if (check) { - return !bkey_packed(k) || could_pack; - } else { - uk.p = new_pos; - extent_save(&b->keys, node_iter, k, &uk); - bch_bset_fix_invalidated_key(&b->keys, k); - bch_btree_node_iter_fix(iter, b, node_iter, k, true); - return true; - } -} - -static bool extent_merge_do_overlapping(struct btree_iter *iter, - struct bkey *m, bool back_merge) -{ - struct btree_keys *b = &iter->nodes[0]->keys; - struct btree_node_iter *node_iter = &iter->node_iters[0]; - const struct bkey_format *f = &b->format; - struct bset_tree *t; - struct bkey_packed *k; - struct bkey uk; - struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m); - bool could_pack = bkey_pack_pos((void *) &uk, new_pos, f); - bool check = true; - - /* - * @m is the new merged extent: - * - * The merge took place in the last bset; we know there can't be any 0 - * size extents overlapping with m there because if so they would have - * been between the two extents we merged. - * - * But in the other bsets, we have to check for and fix such extents: - */ -do_fixup: - for (t = b->set; t < b->set + b->nsets; t++) { - if (!t->data->u64s) - continue; - - /* - * if we don't find this bset in the iterator we already got to - * the end of that bset, so start searching from the end. - */ - k = bch_btree_node_iter_bset_pos(node_iter, b, t->data) ?: - bkey_prev(t, bset_bkey_last(t->data)); - - if (back_merge) { - /* - * Back merge: 0 size extents will be before the key - * that was just inserted (and thus the iterator - * position) - walk backwards to find them - */ - for (; - k && - (uk = bkey_unpack_key(f, k), - bkey_cmp(uk.p, bkey_start_pos(m)) > 0); - k = bkey_prev(t, k)) { - if (bkey_cmp(uk.p, m->p) >= 0) - continue; - - if (!extent_merge_one_overlapping(iter, new_pos, - k, uk, check, could_pack)) - return false; - } - } else { - /* Front merge - walk forwards */ - for (; - k != bset_bkey_last(t->data) && - (uk = bkey_unpack_key(f, k), - bkey_cmp(uk.p, m->p) < 0); - k = bkey_next(k)) { - if (bkey_cmp(uk.p, - bkey_start_pos(m)) <= 0) - continue; - - if (!extent_merge_one_overlapping(iter, new_pos, - k, uk, check, could_pack)) - return false; - } - } - } - - if (check) { - check = false; - goto do_fixup; - } + if (!__extent_save(b, iter, dst, &src->k)) + return false; + memcpy(bkeyp_val(f, dst), &src->v, bkey_val_bytes(&src->k)); return true; } @@ -2082,7 +1951,6 @@ static bool bch_extent_merge_inline(struct btree_iter *iter, BKEY_PADDED(k) li; BKEY_PADDED(k) ri; struct bkey_i *mi; - struct bkey tmp; /* * We need to save copies of both l and r, because we might get a @@ -2101,14 +1969,9 @@ static bool bch_extent_merge_inline(struct btree_iter *iter, case BCH_MERGE_NOMERGE: return false; case BCH_MERGE_PARTIAL: - if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &mi->k, f)) - return false; - - if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) + if (!extent_i_save(b, node_iter, m, mi)) return false; - extent_i_save(b, node_iter, m, mi); - /* * Update iterator to reflect what we just inserted - otherwise, * the iter_fix() call is going to put us _before_ the key we @@ -2126,13 +1989,9 @@ static bool bch_extent_merge_inline(struct btree_iter *iter, bkey_copy(packed_to_bkey(r), &ri.k); return false; case BCH_MERGE_MERGE: - if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &li.k.k, f)) - return false; - - if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge)) + if (!extent_i_save(b, node_iter, m, &li.k)) return false; - extent_i_save(b, node_iter, m, &li.k); bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter, m, true); return true; |