summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-07-21 19:05:06 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-07-30 01:13:53 -0800
commitea331bf0715c6c9d4d91e0e83127bd30aa5e5969 (patch)
tree4e0ff677e2f0ecce3daac21819054332494a9f1f
parent942a8988055dafee337a3d138e1d6190d36a4a63 (diff)
bcache: lift restrictions on 0 size extents
-rw-r--r--drivers/md/bcache/bset.c124
-rw-r--r--drivers/md/bcache/bset.h18
-rw-r--r--drivers/md/bcache/btree_io.c2
-rw-r--r--drivers/md/bcache/extents.c195
4 files changed, 68 insertions, 271 deletions
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index cae3b1f02f41..594f54380c9f 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -20,8 +20,10 @@
#include "alloc_types.h"
#include <trace/events/bcache.h>
-static inline bool btree_node_iter_set_cmp(struct btree_node_iter *,
- struct btree_keys *,
+static inline int btree_node_iter_cmp(struct btree_keys *,
+ const struct bkey_packed *,
+ const struct bkey_packed *);
+static inline bool btree_node_iter_set_cmp(struct btree_keys *,
struct btree_node_iter_set,
struct btree_node_iter_set);
@@ -53,20 +55,6 @@ struct bset_tree *bch_bkey_to_bset(struct btree_keys *b, struct bkey_packed *k)
* by the time we actually do the insert will all be deleted.
*/
-static bool keys_out_of_order(const struct bkey_format *f,
- const struct bkey_packed *prev,
- const struct bkey_packed *next,
- bool is_extents)
-{
- struct bkey nextu = bkey_unpack_key(f, next);
-
- return bkey_cmp_left_packed(f, prev, bkey_start_pos(&nextu)) > 0 ||
- ((is_extents
- ? !bkey_deleted(next)
- : !bkey_deleted(prev)) &&
- !bkey_cmp_packed(f, prev, next));
-}
-
#ifdef CONFIG_BCACHE_DEBUG
void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
@@ -155,7 +143,7 @@ static void bch_btree_node_iter_next_check(struct btree_node_iter *iter,
bkey_unpack_key(f, k);
if (n &&
- keys_out_of_order(f, k, n, iter->is_extents)) {
+ btree_node_iter_cmp(b, k, n) > 0) {
struct bkey ku = bkey_unpack_key(f, k);
struct bkey nu = bkey_unpack_key(f, n);
char buf1[80], buf2[80];
@@ -177,7 +165,7 @@ void bch_btree_node_iter_verify(struct btree_node_iter *iter,
btree_node_iter_for_each(iter, set) {
BUG_ON(set + 1 < iter->data + MAX_BSETS &&
set[1].k != set[1].end &&
- btree_node_iter_set_cmp(iter, b, set[0], set[1]));
+ btree_node_iter_set_cmp(b, set[0], set[1]));
k = __btree_node_offset_to_key(b, set->k);
t = bch_bkey_to_bset(b, k);
@@ -188,53 +176,37 @@ void bch_btree_node_iter_verify(struct btree_node_iter *iter,
}
void bch_verify_key_order(struct btree_keys *b,
- struct btree_node_iter *iter,
+ struct btree_node_iter *_iter,
struct bkey_packed *where)
{
const struct bkey_format *f = &b->format;
+ struct btree_node_iter iter = *_iter;
struct bset_tree *t = bch_bkey_to_bset(b, where);
- struct bkey_packed *k, *prev;
+ struct bkey_packed *k;
struct bkey uk, uw = bkey_unpack_key(f, where);
k = bkey_prev(t, where);
BUG_ON(k &&
- keys_out_of_order(f, k, where, b->ops->is_extents));
+ btree_node_iter_cmp(b, k, where) > 0);
k = bkey_next(where);
BUG_ON(k != bset_bkey_last(t->data) &&
- keys_out_of_order(f, where, k, b->ops->is_extents));
-
- for (t = b->set; t <= b->set + b->nsets; t++) {
- if (!t->data->u64s)
- continue;
-
- if (where >= t->data->start &&
- where < bset_bkey_last(t->data))
- continue;
-
- k = bch_btree_node_iter_bset_pos(iter, b, t->data) ?:
- bkey_prev(t, bset_bkey_last(t->data));
+ btree_node_iter_cmp(b, where, k) > 0);
- while (bkey_cmp_left_packed(f, k, bkey_start_pos(&uw)) > 0 &&
- (prev = bkey_prev(t, k)))
- k = prev;
+ if (b->ops->is_extents) {
+ do {
+ k = bch_btree_node_iter_prev_all(&iter, b);
+ } while (k && bkey_deleted(k));
- for (;
- k != bset_bkey_last(t->data);
- k = bkey_next(k)) {
- uk = bkey_unpack_key(f, k);
+ BUG_ON(k && (uk = bkey_unpack_key(f, k),
+ bkey_cmp(uk.p, bkey_start_pos(&uw)) > 0));
- if (b->ops->is_extents) {
- BUG_ON(!(bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0 ||
- bkey_cmp(uk.p, bkey_start_pos(&uw)) <= 0));
- } else {
- BUG_ON(!bkey_cmp(uw.p, uk.p) &&
- !bkey_deleted(&uk));
- }
+ iter = *_iter;
+ bch_btree_node_iter_advance(&iter, b);
+ k = bch_btree_node_iter_peek(&iter, b);
- if (bkey_cmp(uw.p, bkey_start_pos(&uk)) <= 0)
- break;
- }
+ BUG_ON(k && (uk = bkey_unpack_key(f, k),
+ bkey_cmp(uw.p, bkey_start_pos(&uk)) > 0));
}
}
@@ -1230,44 +1202,28 @@ static struct bkey_packed *bch_bset_search(struct btree_keys *b,
/* Btree node iterator */
-static inline bool btree_node_iter_cmp(struct btree_keys *b,
- bool is_extents,
- struct bkey_packed *l,
- struct bkey_packed *r)
+static inline int btree_node_iter_cmp(struct btree_keys *b,
+ const struct bkey_packed *l,
+ const struct bkey_packed *r)
{
- s64 c = bkey_cmp_packed(&b->format, l, r);
-
- /*
- * For non extents, when keys compare equal the deleted keys have to
- * come first - so that bch_btree_node_iter_next_check() can detect
- * duplicate nondeleted keys (and possibly other reasons?)
- *
- * For extents, bkey_deleted() is used as a proxy for k->size == 0, so
- * deleted keys have to sort last.
- */
- return c ? c > 0
- : is_extents
- ? bkey_deleted(l) > bkey_deleted(r)
- : bkey_deleted(l) < bkey_deleted(r);
+ /* When keys compare equal the deleted keys come first: */
+ return bkey_cmp_packed(&b->format, l, r) ?:
+ (int) bkey_deleted(r) - (int) bkey_deleted(l);
}
/*
* Returns true if l > r:
*/
-static inline bool btree_node_iter_set_cmp(struct btree_node_iter *iter,
- struct btree_keys *b,
+static inline bool btree_node_iter_set_cmp(struct btree_keys *b,
struct btree_node_iter_set l,
struct btree_node_iter_set r)
{
- return btree_node_iter_cmp(b, iter->is_extents,
+ return btree_node_iter_cmp(b,
__btree_node_offset_to_key(b, l.k),
- __btree_node_offset_to_key(b, r.k));
+ __btree_node_offset_to_key(b, r.k)) > 0;
}
-#define btree_node_iter_cmp_heap(_l, _r) \
- (btree_node_iter_cmp(b, (iter)->is_extents, \
- __btree_node_offset_to_key(b, (_l).k), \
- __btree_node_offset_to_key(b, (_r).k)))
+#define btree_node_iter_cmp_heap(_l, _r) btree_node_iter_set_cmp(b, _l, _r)
void bch_btree_node_iter_push(struct btree_node_iter *iter,
struct btree_keys *b,
@@ -1285,7 +1241,7 @@ void bch_btree_node_iter_push(struct btree_node_iter *iter,
});
btree_node_iter_for_each(iter, pos)
- if (!btree_node_iter_set_cmp(iter, b, n, *pos))
+ if (!btree_node_iter_set_cmp(b, n, *pos))
break;
EBUG_ON(pos >= iter->data + MAX_BSETS);
@@ -1320,8 +1276,8 @@ void bch_btree_node_iter_push(struct btree_node_iter *iter,
* This works out ok, but it's something to be aware of:
*
* - For non extents, we guarantee that the live key comes last - see
- * btree_node_iter_cmp(), keys_out_of_order(). So the duplicates you don't
- * see will only be deleted keys you don't care about.
+ * btree_node_iter_cmp(). So the duplicates you don't see will only be
+ * deleted keys you don't care about.
*
* - For extents, deleted keys sort last (see the comment at the top of this
* file). But when you're searching for extents, you actually want the first
@@ -1364,7 +1320,7 @@ void bch_btree_node_iter_init(struct btree_node_iter *iter,
BUG();
}
- __bch_btree_node_iter_init(iter, b);
+ memset(iter, 0, sizeof(*iter));
for (t = b->set; t <= b->set + b->nsets; t++)
bch_btree_node_iter_push(iter, b,
@@ -1381,7 +1337,7 @@ void bch_btree_node_iter_init_from_start(struct btree_node_iter *iter,
{
struct bset_tree *t;
- __bch_btree_node_iter_init(iter, b);
+ memset(iter, 0, sizeof(*iter));
for (t = b->set; t <= b->set + b->nsets; t++)
bch_btree_node_iter_push(iter, b,
@@ -1413,7 +1369,7 @@ static inline void btree_node_iter_sift(struct btree_node_iter *iter,
for (i = start;
i + 1 < MAX_BSETS &&
iter->data[i + 1].k != iter->data[i + 1].end &&
- btree_node_iter_set_cmp(iter, b, iter->data[i], iter->data[i + 1]);
+ btree_node_iter_set_cmp(b, iter->data[i], iter->data[i + 1]);
i++)
swap(iter->data[i], iter->data[i + 1]);
}
@@ -1484,7 +1440,7 @@ found:
if (k &&
(!prev ||
- btree_node_iter_cmp(b, iter->is_extents, k, prev))) {
+ btree_node_iter_cmp(b, prev, k) < 0)) {
prev = k;
prev_set = i;
prev_t = t;
@@ -1716,7 +1672,7 @@ struct btree_nr_keys bch_sort_bsets(struct bset *dst, struct btree_keys *b,
struct bset_tree *t;
iter = &_iter;
- __bch_btree_node_iter_large_init(iter, b);
+ iter->used = 0;
for (t = b->set + from; t <= b->set + b->nsets; t++)
bch_btree_node_iter_large_push(iter, b,
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index df13473506ab..1dd1fc12b51a 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -442,20 +442,11 @@ static inline enum bch_extent_overlap bch_extent_overlap(const struct bkey *k,
/* Btree key iteration */
struct btree_node_iter {
- u8 is_extents;
-
struct btree_node_iter_set {
u16 k, end;
} data[MAX_BSETS];
};
-static inline void __bch_btree_node_iter_init(struct btree_node_iter *iter,
- struct btree_keys *b)
-{
- iter->is_extents = b->ops->is_extents;
- memset(iter->data, 0, sizeof(iter->data));
-}
-
void bch_btree_node_iter_push(struct btree_node_iter *, struct btree_keys *,
const struct bkey_packed *,
const struct bkey_packed *);
@@ -559,20 +550,11 @@ struct bkey_s_c bch_btree_node_iter_peek_unpack(struct btree_node_iter *,
/* Sorting */
struct btree_node_iter_large {
- u8 is_extents;
u16 used;
struct btree_node_iter_set data[MAX_BSETS];
};
-static inline void
-__bch_btree_node_iter_large_init(struct btree_node_iter_large *iter,
- struct btree_keys *b)
-{
- iter->used = 0;
- iter->is_extents = b->ops->is_extents;
-}
-
void bch_btree_node_iter_large_advance(struct btree_node_iter_large *,
struct btree_keys *);
diff --git a/drivers/md/bcache/btree_io.c b/drivers/md/bcache/btree_io.c
index 2e362003a44d..f90fc417aed1 100644
--- a/drivers/md/bcache/btree_io.c
+++ b/drivers/md/bcache/btree_io.c
@@ -267,7 +267,7 @@ void bch_btree_node_read_done(struct cache_set *c, struct btree *b,
int ret;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
- __bch_btree_node_iter_large_init(iter, &b->keys);
+ iter->used = 0;
err = "dynamic fault";
if (bch_meta_read_fault("btree"))
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 10ce62fb673b..951850c0eb27 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -1254,6 +1254,11 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
+ if (!k.k->size) {
+ bch_btree_node_iter_advance(node_iter, &b->keys);
+ continue;
+ }
+
if (bkey_cmp(bkey_start_pos(k.k), insert->k->k.p) >= 0)
break;
@@ -1274,27 +1279,21 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
}
}
- /*
- * Only call advance pos & call hook for nonzero size extents:
- * If hook returned BTREE_HOOK_NO_INSERT, @insert->k no longer
- * overlaps with @k:
- */
- if (k.k->size)
- switch (extent_insert_advance_pos(trans, insert, hook,
- &committed_pos,
- k.s_c, res, flags,
- &stats)) {
- case BTREE_HOOK_DO_INSERT:
- break;
- case BTREE_HOOK_NO_INSERT:
- continue;
- case BTREE_HOOK_RESTART_TRANS:
- ret = BTREE_INSERT_NEED_TRAVERSE;
- goto stop;
- }
+ switch (extent_insert_advance_pos(trans, insert, hook,
+ &committed_pos,
+ k.s_c, res, flags,
+ &stats)) {
+ case BTREE_HOOK_DO_INSERT:
+ break;
+ case BTREE_HOOK_NO_INSERT:
+ continue;
+ case BTREE_HOOK_RESTART_TRANS:
+ ret = BTREE_INSERT_NEED_TRAVERSE;
+ goto stop;
+ }
/* k is the key currently in the tree, 'insert' is the new key */
- switch (bch_extent_overlap(&insert->k->k, k.k)) {
+ switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
bch_cut_subtract_front(iter, insert->k->k.p, k, &stats);
@@ -1319,49 +1318,15 @@ bch_insert_fixup_extent(struct btree_insert_trans *trans,
bch_btree_node_iter_fix(iter, b, node_iter, _k, true);
break;
- case BCH_EXTENT_OVERLAP_ALL: {
- struct bpos orig_pos = k.k->p;
-
+ case BCH_EXTENT_OVERLAP_ALL:
/* The insert key completely covers k, invalidate k */
if (!bkey_deleted(_k))
btree_keys_account_key_drop(&b->keys.nr, _k);
bch_drop_subtract(iter, k, &stats);
- k.k->p = bkey_start_pos(&insert->k->k);
- if (!__extent_save(&b->keys, node_iter, _k, k.k)) {
- /*
- * Couldn't repack: we aren't necessarily able
- * to repack if the new key is outside the range
- * of the old extent, so we have to split
- * @insert:
- */
- k.k->p = orig_pos;
- extent_save(&b->keys, node_iter, _k, k.k);
-
- if (extent_insert_advance_pos(trans, insert,
- hook, &committed_pos,
- k.s_c, res, flags,
- &stats) ==
- BTREE_HOOK_RESTART_TRANS) {
- ret = BTREE_INSERT_NEED_TRAVERSE;
- goto stop;
- }
- extent_insert_committed(trans, insert, committed_pos,
- res, flags, &stats);
- /*
- * We split and inserted upto at k.k->p - that
- * has to coincide with iter->pos, so that we
- * don't have anything more we have to insert
- * until we recheck our journal reservation:
- */
- EBUG_ON(bkey_cmp(committed_pos, k.k->p));
- } else {
- bch_bset_fix_invalidated_key(&b->keys, _k);
- bch_btree_node_iter_fix(iter, b, node_iter, _k, true);
- }
-
+ extent_save(&b->keys, node_iter, _k, k.k);
break;
- }
+
case BCH_EXTENT_OVERLAP_MIDDLE: {
BKEY_PADDED(k) split;
/*
@@ -1952,113 +1917,17 @@ static enum merge_result bch_extent_merge(struct btree_keys *bk,
return BCH_MERGE_MERGE;
}
-static void extent_i_save(struct btree_keys *b, struct btree_node_iter *iter,
+static bool extent_i_save(struct btree_keys *b, struct btree_node_iter *iter,
struct bkey_packed *dst, struct bkey_i *src)
{
struct bkey_format *f = &b->format;
BUG_ON(bkeyp_val_u64s(f, dst) != bkey_val_u64s(&src->k));
- extent_save(b, iter, dst, &src->k);
- memcpy(bkeyp_val(f, dst), &src->v, bkey_val_bytes(&src->k));
-}
-
-static bool extent_merge_one_overlapping(struct btree_iter *iter,
- struct bpos new_pos,
- struct bkey_packed *k, struct bkey uk,
- bool check, bool could_pack)
-{
- struct btree *b = iter->nodes[0];
- struct btree_node_iter *node_iter = &iter->node_iters[0];
-
- BUG_ON(!bkey_deleted(k));
-
- if (check) {
- return !bkey_packed(k) || could_pack;
- } else {
- uk.p = new_pos;
- extent_save(&b->keys, node_iter, k, &uk);
- bch_bset_fix_invalidated_key(&b->keys, k);
- bch_btree_node_iter_fix(iter, b, node_iter, k, true);
- return true;
- }
-}
-
-static bool extent_merge_do_overlapping(struct btree_iter *iter,
- struct bkey *m, bool back_merge)
-{
- struct btree_keys *b = &iter->nodes[0]->keys;
- struct btree_node_iter *node_iter = &iter->node_iters[0];
- const struct bkey_format *f = &b->format;
- struct bset_tree *t;
- struct bkey_packed *k;
- struct bkey uk;
- struct bpos new_pos = back_merge ? m->p : bkey_start_pos(m);
- bool could_pack = bkey_pack_pos((void *) &uk, new_pos, f);
- bool check = true;
-
- /*
- * @m is the new merged extent:
- *
- * The merge took place in the last bset; we know there can't be any 0
- * size extents overlapping with m there because if so they would have
- * been between the two extents we merged.
- *
- * But in the other bsets, we have to check for and fix such extents:
- */
-do_fixup:
- for (t = b->set; t < b->set + b->nsets; t++) {
- if (!t->data->u64s)
- continue;
-
- /*
- * if we don't find this bset in the iterator we already got to
- * the end of that bset, so start searching from the end.
- */
- k = bch_btree_node_iter_bset_pos(node_iter, b, t->data) ?:
- bkey_prev(t, bset_bkey_last(t->data));
-
- if (back_merge) {
- /*
- * Back merge: 0 size extents will be before the key
- * that was just inserted (and thus the iterator
- * position) - walk backwards to find them
- */
- for (;
- k &&
- (uk = bkey_unpack_key(f, k),
- bkey_cmp(uk.p, bkey_start_pos(m)) > 0);
- k = bkey_prev(t, k)) {
- if (bkey_cmp(uk.p, m->p) >= 0)
- continue;
-
- if (!extent_merge_one_overlapping(iter, new_pos,
- k, uk, check, could_pack))
- return false;
- }
- } else {
- /* Front merge - walk forwards */
- for (;
- k != bset_bkey_last(t->data) &&
- (uk = bkey_unpack_key(f, k),
- bkey_cmp(uk.p, m->p) < 0);
- k = bkey_next(k)) {
- if (bkey_cmp(uk.p,
- bkey_start_pos(m)) <= 0)
- continue;
-
- if (!extent_merge_one_overlapping(iter, new_pos,
- k, uk, check, could_pack))
- return false;
- }
- }
- }
-
- if (check) {
- check = false;
- goto do_fixup;
- }
+ if (!__extent_save(b, iter, dst, &src->k))
+ return false;
+ memcpy(bkeyp_val(f, dst), &src->v, bkey_val_bytes(&src->k));
return true;
}
@@ -2082,7 +1951,6 @@ static bool bch_extent_merge_inline(struct btree_iter *iter,
BKEY_PADDED(k) li;
BKEY_PADDED(k) ri;
struct bkey_i *mi;
- struct bkey tmp;
/*
* We need to save copies of both l and r, because we might get a
@@ -2101,14 +1969,9 @@ static bool bch_extent_merge_inline(struct btree_iter *iter,
case BCH_MERGE_NOMERGE:
return false;
case BCH_MERGE_PARTIAL:
- if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &mi->k, f))
- return false;
-
- if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
+ if (!extent_i_save(b, node_iter, m, mi))
return false;
- extent_i_save(b, node_iter, m, mi);
-
/*
* Update iterator to reflect what we just inserted - otherwise,
* the iter_fix() call is going to put us _before_ the key we
@@ -2126,13 +1989,9 @@ static bool bch_extent_merge_inline(struct btree_iter *iter,
bkey_copy(packed_to_bkey(r), &ri.k);
return false;
case BCH_MERGE_MERGE:
- if (bkey_packed(m) && !bkey_pack_key((void *) &tmp, &li.k.k, f))
- return false;
-
- if (!extent_merge_do_overlapping(iter, &li.k.k, back_merge))
+ if (!extent_i_save(b, node_iter, m, &li.k))
return false;
- extent_i_save(b, node_iter, m, &li.k);
bch_btree_node_iter_fix(iter, iter->nodes[0], node_iter,
m, true);
return true;