summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kmo@daterainc.com>2015-02-09 17:54:33 -0800
committerKent Overstreet <kmo@daterainc.com>2015-02-12 23:44:11 -0800
commitb354cf6ccab5f9934e88b8c2f91b47e2152f84b2 (patch)
tree4df6c8d3217413bd5aee16d8dc1284e191fc1e22
parentb9c284fce93e07f97d415d6c1aec8e4bf6b65f94 (diff)
bcache: Hook up new btree node fields
Change-Id: Id688153f065d685af18a8e2515edbb5991c86559
-rw-r--r--drivers/md/bcache/bkey.c13
-rw-r--r--drivers/md/bcache/bkey_methods.c7
-rw-r--r--drivers/md/bcache/btree.c83
-rw-r--r--drivers/md/bcache/btree.h13
-rw-r--r--drivers/md/bcache/debug.c8
-rw-r--r--drivers/md/bcache/gc.c76
6 files changed, 121 insertions, 79 deletions
diff --git a/drivers/md/bcache/bkey.c b/drivers/md/bcache/bkey.c
index 276d5b5ed2a1..ab3a034f407a 100644
--- a/drivers/md/bcache/bkey.c
+++ b/drivers/md/bcache/bkey.c
@@ -283,19 +283,6 @@ struct bkey_format bch_bkey_format_done(struct bkey_format_state *s)
.nr_fields = BKEY_NR_FIELDS,
};
- /*
- * Hack to make sure we can always repack keys in
- * bch_insert_fixup_extent(), when trimming makes the offset smaller
- */
- s->field_min[BKEY_FIELD_OFFSET] =
- max_t(s64, 0, s->field_min[BKEY_FIELD_OFFSET] - UINT_MAX);
-
- /*
- * Make sure we can always store a size of 0, also because of
- * bch_insert_fixup_extent
- */
- s->field_min[BKEY_FIELD_SIZE] = 0;
-
for (i = 0; i < ARRAY_SIZE(s->field_min); i++) {
ret.field_offset[i] = min(s->field_min[i], s->field_max[i]);
ret.bits_per_field[i] = fls(s->field_max[i] -
diff --git a/drivers/md/bcache/bkey_methods.c b/drivers/md/bcache/bkey_methods.c
index 2b37fe623dd7..44da385c9b09 100644
--- a/drivers/md/bcache/bkey_methods.c
+++ b/drivers/md/bcache/bkey_methods.c
@@ -54,7 +54,12 @@ void bkey_debugcheck(struct btree *b, struct bkey_s_c k)
BUG_ON(!k.k->u64s);
- cache_set_bug_on(bkey_cmp(k.k->p, b->key.k.p) > 0,
+ cache_set_bug_on(bkey_cmp(bkey_start_pos(k.k),
+ b->data->min_key) < 0,
+ b->c, "key before start of btree node");
+
+ cache_set_bug_on(bkey_cmp(k.k->p,
+ b->data->max_key) > 0,
b->c, "key past end of btree node");
if (bkey_invalid(b->c, type, k)) {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index a79beb263ee0..c4f9851b8b78 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -340,8 +340,16 @@ void bch_btree_node_read_done(struct btree *b, struct cache *ca,
if (!b->data->keys.seq)
goto err;
+ err = "incorrect max key";
+ if (bkey_cmp(b->data->max_key, b->key.k.p))
+ goto err;
+
+ err = "incorrect level";
+ if (BSET_BTREE_LEVEL(i) != b->level)
+ goto err;
+
b->keys.format = b->data->format;
- i = b->keys.set->data = &b->data->keys;
+ b->keys.set->data = &b->data->keys;
while (b->written < btree_blocks(c)) {
unsigned blocks;
@@ -1467,10 +1475,12 @@ static struct btree *bch_btree_node_alloc(struct cache_set *c, int level,
bch_check_mark_super(c, &b->key, true);
b->accessed = 1;
- b->data->magic = bset_magic(&c->sb);
- bch_bset_init_first(&b->keys, &b->data->keys);
set_btree_node_dirty(b);
+ bch_bset_init_first(&b->keys, &b->data->keys);
+ b->data->magic = bset_magic(&c->sb);
+ SET_BSET_BTREE_LEVEL(&b->data->keys, level);
+
trace_bcache_btree_node_alloc(b);
return b;
}
@@ -1484,7 +1494,11 @@ struct btree *__btree_node_alloc_replacement(struct btree *b,
bch_verify_btree_keys_accounting(&b->keys);
n = bch_btree_node_alloc(b->c, b->level, b->btree_id, reserve);
- n->keys.format = format;
+
+ n->data->min_key = b->data->min_key;
+ n->data->max_key = b->data->max_key;
+ n->data->format = format;
+ n->keys.format = format;
bch_btree_sort_into(&n->keys, &b->keys,
b->keys.ops->key_normalize,
@@ -1511,19 +1525,19 @@ void __bch_btree_calc_format(struct bkey_format_state *s, struct btree *b)
* place, and successfully repack, when insert an overlapping
* extent:
*/
- bch_bkey_format_add_pos(s, b->key.k.p);
-
- s->field_min[1] = max_t(s64, 0, s->field_min[1] - UINT_MAX);
+ bch_bkey_format_add_pos(s, b->data->min_key);
+ bch_bkey_format_add_pos(s, b->data->max_key);
/*
* If we span multiple inodes, need to be able to store an
* offset of 0:
*/
- if (s->field_min[0] != s->field_max[0])
- s->field_min[1] = 0;
+ if (s->field_min[BKEY_FIELD_INODE] !=
+ s->field_max[BKEY_FIELD_INODE])
+ s->field_min[BKEY_FIELD_OFFSET] = 0;
/* Make sure we can store a size of 0: */
- s->field_min[3] = 0;
+ s->field_min[BKEY_FIELD_SIZE] = 0;
}
}
@@ -1601,6 +1615,22 @@ int btree_check_reserve(struct btree *b, struct btree_iter *iter,
iter ? &iter->cl : NULL);
}
+static struct btree *__btree_root_alloc(struct cache_set *c, unsigned level,
+ enum btree_id id,
+ enum alloc_reserve reserve)
+{
+ struct btree *b = bch_btree_node_alloc(c, level, id, reserve);
+
+ b->data->min_key = POS_MIN;
+ b->data->max_key = POS_MAX;
+ b->data->format = bch_btree_calc_format(b);
+ b->key.k.p = POS_MAX;
+
+ six_unlock_write(&b->lock);
+
+ return b;
+}
+
int bch_btree_root_alloc(struct cache_set *c, enum btree_id id,
struct closure *writes)
{
@@ -1612,10 +1642,7 @@ int bch_btree_root_alloc(struct cache_set *c, enum btree_id id,
while (__btree_check_reserve(c, id, 1, &cl))
closure_sync(&cl);
- b = bch_btree_node_alloc(c, 0, id, id);
-
- b->key.k.p = POS_MAX;
- six_unlock_write(&b->lock);
+ b = __btree_root_alloc(c, 0, id, id);
bch_btree_node_write(b, writes, NULL);
@@ -2045,16 +2072,13 @@ static int btree_split(struct btree *b,
n2 = bch_btree_node_alloc(iter->c, b->level,
iter->btree_id, reserve);
+ n2->data->max_key = n1->data->max_key;
n2->keys.format = n1->keys.format;
set2 = btree_bset_first(n2);
- if (!parent) {
- n3 = bch_btree_node_alloc(iter->c, b->level + 1,
- iter->btree_id, reserve);
-
- n3->key.k.p = POS_MAX;
- six_unlock_write(&n3->lock);
- }
+ if (!parent)
+ n3 = __btree_root_alloc(iter->c, b->level + 1,
+ iter->btree_id, reserve);
/*
* Has to be a linear search because we don't have an auxiliary
@@ -2074,6 +2098,10 @@ static int btree_split(struct btree *b,
n1->key.k.p = bkey_unpack_key(&n1->keys.format, k).p;
k = bkey_next(k);
+ n1->data->max_key = n1->key.k.p;
+ n2->data->min_key =
+ __bch_btree_iter_advance_pos(iter, n1->key.k.p);
+
set2->u64s = (u64 *) bset_bkey_last(set1) - (u64 *) k;
set1->u64s -= set2->u64s;
@@ -2625,19 +2653,6 @@ void bch_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
iter->pos = new_pos;
}
-static struct bpos __bch_btree_iter_advance_pos(struct btree_iter *iter,
- struct bpos pos)
-{
- if (iter->btree_id == BTREE_ID_INODES) {
- pos.inode++;
- pos.offset = 0;
- } else if (iter->btree_id != BTREE_ID_EXTENTS) {
- pos = bkey_successor(pos);
- }
-
- return pos;
-}
-
void bch_btree_iter_advance_pos(struct btree_iter *iter)
{
bch_btree_iter_set_pos(iter,
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 608d947de373..e08d68650d7d 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -289,6 +289,19 @@ void bch_btree_iter_set_pos(struct btree_iter *, struct bpos);
void bch_btree_iter_advance_pos(struct btree_iter *);
bool bch_btree_iter_upgrade(struct btree_iter *);
+static inline struct bpos __bch_btree_iter_advance_pos(struct btree_iter *iter,
+ struct bpos pos)
+{
+ if (iter->btree_id == BTREE_ID_INODES) {
+ pos.inode++;
+ pos.offset = 0;
+ } else if (iter->btree_id != BTREE_ID_EXTENTS) {
+ pos = bkey_successor(pos);
+ }
+
+ return pos;
+}
+
static inline void __btree_iter_node_set(struct btree_iter *iter,
struct btree *b,
struct bpos pos)
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 5b2919e08888..4bb4ed1d1f50 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -302,13 +302,15 @@ static ssize_t bch_read_btree_formats(struct file *file, char __user *buf,
bch_btree_keys_stats(&b->keys, &stats);
i->bytes = scnprintf(i->buf, sizeof(i->buf),
- "l %u %llu:%llu:\n"
+ "l %u %llu:%llu - %llu:%llu:\n"
"\tformat: u64s %u fields %u %u %u %u %u\n"
"\tpacked %u unpacked %u u64s %u\n"
"\tfloats %zu failed %zu\n",
b->level,
- b->key.k.p.inode,
- b->key.k.p.offset,
+ b->data->min_key.inode,
+ b->data->min_key.offset,
+ b->data->max_key.inode,
+ b->data->max_key.offset,
f->key_u64s,
f->bits_per_field[0],
f->bits_per_field[1],
diff --git a/drivers/md/bcache/gc.c b/drivers/md/bcache/gc.c
index 2f52d0b5ef78..994849430e21 100644
--- a/drivers/md/bcache/gc.c
+++ b/drivers/md/bcache/gc.c
@@ -230,33 +230,36 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
* up at different boundaries.
*/
for (i = nr_new_nodes - 1; i > 0; --i) {
- struct bset *n1 = btree_bset_first(new_nodes[i]);
- struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
+ struct btree *n1 = new_nodes[i];
+ struct btree *n2 = new_nodes[i - 1];
+
+ struct bset *s1 = btree_bset_first(n1);
+ struct bset *s2 = btree_bset_first(n2);
struct bkey_packed *k, *last = NULL;
u64s = 0;
- for (k = n2->start;
- k < bset_bkey_last(n2) &&
- __set_blocks(n1, n1->u64s + u64s + k->u64s,
+ for (k = s2->start;
+ k < bset_bkey_last(s2) &&
+ __set_blocks(s1, s1->u64s + u64s + k->u64s,
block_bytes(c)) <= blocks;
k = bkey_next(k)) {
last = k;
u64s += k->u64s;
}
- if (u64s == n2->u64s) {
+ if (u64s == s2->u64s) {
/* n2 fits entirely in n1 */
- new_nodes[i]->key.k.p = new_nodes[i - 1]->key.k.p;
+ n1->key.k.p = n1->data->max_key = n2->data->max_key;
- memcpy(bset_bkey_last(n1),
- n2->start,
- n2->u64s * sizeof(u64));
- n1->u64s += n2->u64s;
+ memcpy(bset_bkey_last(s1),
+ s2->start,
+ s2->u64s * sizeof(u64));
+ s1->u64s += s2->u64s;
- six_unlock_write(&new_nodes[i - 1]->lock);
- btree_node_free(new_nodes[i - 1]);
- six_unlock_intent(&new_nodes[i - 1]->lock);
+ six_unlock_write(&n2->lock);
+ btree_node_free(n2);
+ six_unlock_intent(&n2->lock);
memmove(new_nodes + i - 1,
new_nodes + i,
@@ -264,28 +267,32 @@ static void bch_coalesce_nodes(struct btree *old_nodes[GC_MERGE_NODES],
new_nodes[--nr_new_nodes] = NULL;
} else if (u64s) {
/* move part of n2 into n1 */
- new_nodes[i]->key.k.p =
- bkey_unpack_key(&new_format, last).p;
+ n1->key.k.p = n1->data->max_key =
+ bkey_unpack_key(&n1->keys.format, last).p;
+
+ n2->data->min_key =
+ __bch_btree_iter_advance_pos(iter,
+ n1->data->max_key);
- memcpy(bset_bkey_last(n1),
- n2->start,
+ memcpy(bset_bkey_last(s1),
+ s2->start,
u64s * sizeof(u64));
- n1->u64s += u64s;
+ s1->u64s += u64s;
- memmove(n2->start,
- bset_bkey_idx(n2, u64s),
- (n2->u64s - u64s) * sizeof(u64));
- n2->u64s -= u64s;
+ memmove(s2->start,
+ bset_bkey_idx(s2, u64s),
+ (s2->u64s - u64s) * sizeof(u64));
+ s2->u64s -= u64s;
}
}
for (i = 0; i < nr_new_nodes; i++) {
- new_nodes[i]->keys.nr_live_u64s =
- new_nodes[i]->keys.set[0].data->u64s;
- recalc_packed_keys(new_nodes[i]);
+ struct btree *n = new_nodes[i];
- six_unlock_write(&new_nodes[i]->lock);
- bch_btree_node_write(new_nodes[i], &cl, NULL);
+ n->keys.nr_live_u64s = n->keys.set->data->u64s;
+ recalc_packed_keys(n);
+ six_unlock_write(&n->lock);
+ bch_btree_node_write(n, &cl, NULL);
}
/* Wait for all the writes to finish */
@@ -374,6 +381,7 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id,
struct btree_iter iter;
struct btree *b;
bool should_rewrite;
+ struct bpos next_min = POS_MIN;
bch_btree_iter_init(&iter, c, btree_id, POS_MIN);
iter.is_extents = false;
@@ -382,6 +390,18 @@ static int bch_gc_btree(struct cache_set *c, enum btree_id btree_id,
for (b = bch_btree_iter_peek_node(&iter);
b;
b = bch_btree_iter_next_node(&iter)) {
+ if (!b->level) {
+ cache_set_bug_on(bkey_cmp(b->data->min_key, next_min), c,
+ "btree node has incorrect min key: %llu:%llu != %llu:%llu",
+ b->data->min_key.inode,
+ b->data->min_key.offset,
+ next_min.inode,
+ next_min.offset);
+
+ if (bkey_cmp(b->data->max_key, POS_MAX))
+ next_min = __bch_btree_iter_advance_pos(&iter, b->data->max_key);
+ }
+
bch_verify_btree_keys_accounting(&b->keys);
should_rewrite = btree_gc_mark_node(c, b, stat);