summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-01-07 19:49:15 -0500
committerKent Overstreet <kent.overstreet@gmail.com>2021-01-08 21:33:27 -0500
commitf39f0bde7875aceb3e82a330f3a86223c6dd8af4 (patch)
tree06f46528e9c5ac2f472699c8b857fee11b37b0f1
parent41dc1733f12fab96df2089d7c62036a880891010 (diff)
Update bcachefs sources to fcf8a0889c bcachefs: bch2_alloc_write() should be writing for all devices
-rw-r--r--.bcachefs_revision2
-rw-r--r--cmd_migrate.c2
-rw-r--r--libbcachefs/alloc_background.c12
-rw-r--r--libbcachefs/alloc_foreground.c60
-rw-r--r--libbcachefs/alloc_types.h13
-rw-r--r--libbcachefs/bcachefs.h2
-rw-r--r--libbcachefs/bcachefs_format.h2
-rw-r--r--libbcachefs/bkey_buf.h60
-rw-r--r--libbcachefs/bkey_on_stack.h43
-rw-r--r--libbcachefs/bkey_sort.c18
-rw-r--r--libbcachefs/btree_cache.c13
-rw-r--r--libbcachefs/btree_gc.c31
-rw-r--r--libbcachefs/btree_io.c23
-rw-r--r--libbcachefs/btree_iter.c44
-rw-r--r--libbcachefs/btree_key_cache.c2
-rw-r--r--libbcachefs/btree_types.h2
-rw-r--r--libbcachefs/btree_update.h2
-rw-r--r--libbcachefs/btree_update_interior.c30
-rw-r--r--libbcachefs/btree_update_leaf.c11
-rw-r--r--libbcachefs/buckets.c3
-rw-r--r--libbcachefs/compress.c13
-rw-r--r--libbcachefs/ec.c242
-rw-r--r--libbcachefs/ec.h12
-rw-r--r--libbcachefs/extent_update.c1
-rw-r--r--libbcachefs/extents.c32
-rw-r--r--libbcachefs/extents.h4
-rw-r--r--libbcachefs/fs-io.c22
-rw-r--r--libbcachefs/fs.c19
-rw-r--r--libbcachefs/fsck.c10
-rw-r--r--libbcachefs/io.c98
-rw-r--r--libbcachefs/io.h8
-rw-r--r--libbcachefs/journal.c4
-rw-r--r--libbcachefs/journal_io.c23
-rw-r--r--libbcachefs/journal_types.h2
-rw-r--r--libbcachefs/migrate.c20
-rw-r--r--libbcachefs/move.c68
-rw-r--r--libbcachefs/movinggc.c5
-rw-r--r--libbcachefs/recovery.c49
-rw-r--r--libbcachefs/reflink.c21
-rw-r--r--libbcachefs/sysfs.c2
40 files changed, 589 insertions, 441 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 6bdc42aa..14540446 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-5241335413ef160e309fd41ab909532fec656a3a
+fcf8a0889c125511ae841960c73df62237ab05a7
diff --git a/cmd_migrate.c b/cmd_migrate.c
index 42fbc2bc..40d72671 100644
--- a/cmd_migrate.c
+++ b/cmd_migrate.c
@@ -301,7 +301,7 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
while (length) {
struct bkey_i_extent *e;
- BKEY_PADDED(k) k;
+ __BKEY_PADDED(k, BKEY_EXTENT_VAL_U64s_MAX) k;
u64 b = sector_to_bucket(ca, physical);
struct disk_reservation res;
unsigned sectors;
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 62ca9b7a..60c2c38b 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -319,9 +319,7 @@ retry:
bch2_trans_update(trans, iter, &a->k_i,
BTREE_TRIGGER_NORUN);
ret = bch2_trans_commit(trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- flags);
+ BTREE_INSERT_NOFAIL|flags);
err:
if (ret == -EINTR)
goto retry;
@@ -368,7 +366,7 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags)
unsigned i;
int ret = 0;
- for_each_rw_member(ca, c, i) {
+ for_each_member_device(ca, c, i) {
bch2_dev_alloc_write(c, ca, flags);
if (ret) {
percpu_ref_put(&ca->io_ref);
@@ -575,8 +573,7 @@ static int wait_buckets_available(struct bch_fs *c, struct bch_dev *ca)
if (available > fifo_free(&ca->free_inc) ||
(available &&
- (!fifo_full(&ca->free[RESERVE_BTREE]) ||
- !fifo_full(&ca->free[RESERVE_MOVINGGC]))))
+ !fifo_full(&ca->free[RESERVE_MOVINGGC])))
break;
up_read(&c->gc_lock);
@@ -977,8 +974,7 @@ retry:
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE|
+ BTREE_INSERT_JOURNAL_RESERVED|
flags);
if (ret == -EINTR)
goto retry;
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 7a92e3d5..dcbe0404 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -204,9 +204,10 @@ success:
static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
{
switch (reserve) {
- case RESERVE_ALLOC:
- return 0;
case RESERVE_BTREE:
+ case RESERVE_BTREE_MOVINGGC:
+ return 0;
+ case RESERVE_MOVINGGC:
return OPEN_BUCKETS_COUNT / 4;
default:
return OPEN_BUCKETS_COUNT / 2;
@@ -263,16 +264,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
goto out;
switch (reserve) {
- case RESERVE_ALLOC:
- if (fifo_pop(&ca->free[RESERVE_BTREE], bucket))
- goto out;
- break;
- case RESERVE_BTREE:
- if (fifo_used(&ca->free[RESERVE_BTREE]) * 2 >=
- ca->free[RESERVE_BTREE].size &&
- fifo_pop(&ca->free[RESERVE_BTREE], bucket))
- goto out;
- break;
+ case RESERVE_BTREE_MOVINGGC:
case RESERVE_MOVINGGC:
if (fifo_pop(&ca->free[RESERVE_MOVINGGC], bucket))
goto out;
@@ -458,16 +450,18 @@ bch2_bucket_alloc_set(struct bch_fs *c,
* it's to a device we don't want:
*/
-static void bucket_alloc_from_stripe(struct bch_fs *c,
- struct open_buckets *ptrs,
- struct write_point *wp,
- struct bch_devs_mask *devs_may_alloc,
- u16 target,
- unsigned erasure_code,
- unsigned nr_replicas,
- unsigned *nr_effective,
- bool *have_cache,
- unsigned flags)
+static enum bucket_alloc_ret
+bucket_alloc_from_stripe(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct write_point *wp,
+ struct bch_devs_mask *devs_may_alloc,
+ u16 target,
+ unsigned erasure_code,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ unsigned flags,
+ struct closure *cl)
{
struct dev_alloc_list devs_sorted;
struct ec_stripe_head *h;
@@ -476,17 +470,21 @@ static void bucket_alloc_from_stripe(struct bch_fs *c,
unsigned i, ec_idx;
if (!erasure_code)
- return;
+ return 0;
if (nr_replicas < 2)
- return;
+ return 0;
if (ec_open_bucket(c, ptrs))
- return;
+ return 0;
- h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1);
+ h = bch2_ec_stripe_head_get(c, target, 0, nr_replicas - 1,
+ wp == &c->copygc_write_point,
+ cl);
+ if (IS_ERR(h))
+ return -PTR_ERR(h);
if (!h)
- return;
+ return 0;
devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
@@ -508,6 +506,7 @@ got_bucket:
atomic_inc(&h->s->pin);
out_put_head:
bch2_ec_stripe_head_put(c, h);
+ return 0;
}
/* Sector allocator */
@@ -585,10 +584,13 @@ open_bucket_add_buckets(struct bch_fs *c,
}
if (!ec_open_bucket(c, ptrs)) {
- bucket_alloc_from_stripe(c, ptrs, wp, &devs,
+ ret = bucket_alloc_from_stripe(c, ptrs, wp, &devs,
target, erasure_code,
nr_replicas, nr_effective,
- have_cache, flags);
+ have_cache, flags, _cl);
+ if (ret == FREELIST_EMPTY ||
+ ret == OPEN_BUCKETS_EMPTY)
+ return ret;
if (*nr_effective >= nr_replicas)
return 0;
}
diff --git a/libbcachefs/alloc_types.h b/libbcachefs/alloc_types.h
index 20705460..1abfff52 100644
--- a/libbcachefs/alloc_types.h
+++ b/libbcachefs/alloc_types.h
@@ -34,14 +34,12 @@ struct bucket_clock {
struct mutex lock;
};
-/* There is one reserve for each type of btree, one for prios and gens
- * and one for moving GC */
enum alloc_reserve {
- RESERVE_ALLOC = -1,
- RESERVE_BTREE = 0,
- RESERVE_MOVINGGC = 1,
- RESERVE_NONE = 2,
- RESERVE_NR = 3,
+ RESERVE_BTREE_MOVINGGC = -2,
+ RESERVE_BTREE = -1,
+ RESERVE_MOVINGGC = 0,
+ RESERVE_NONE = 1,
+ RESERVE_NR = 2,
};
typedef FIFO(long) alloc_fifo;
@@ -89,7 +87,6 @@ struct write_point {
u64 last_used;
unsigned long write_point;
enum bch_data_type type;
- bool is_ec;
/* calculated based on how many pointers we're actually going to use: */
unsigned sectors_free;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index eb5b4080..505777ba 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -510,7 +510,7 @@ enum {
/* misc: */
BCH_FS_FIXED_GENS,
- BCH_FS_ALLOC_WRITTEN,
+ BCH_FS_NEED_ALLOC_WRITE,
BCH_FS_REBUILD_REPLICAS,
BCH_FS_HOLD_BTREE_WRITES,
};
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 9f59c6b3..307d5523 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -634,8 +634,6 @@ struct bch_reservation {
#define BKEY_EXTENT_VAL_U64s_MAX \
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
-#define BKEY_PADDED(key) __BKEY_PADDED(key, BKEY_EXTENT_VAL_U64s_MAX)
-
/* * Maximum possible size of an entire extent, key + value: */
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
diff --git a/libbcachefs/bkey_buf.h b/libbcachefs/bkey_buf.h
new file mode 100644
index 00000000..0d7c67a9
--- /dev/null
+++ b/libbcachefs/bkey_buf.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BKEY_BUF_H
+#define _BCACHEFS_BKEY_BUF_H
+
+#include "bcachefs.h"
+
+struct bkey_buf {
+ struct bkey_i *k;
+ u64 onstack[12];
+};
+
+static inline void bch2_bkey_buf_realloc(struct bkey_buf *s,
+ struct bch_fs *c, unsigned u64s)
+{
+ if (s->k == (void *) s->onstack &&
+ u64s > ARRAY_SIZE(s->onstack)) {
+ s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
+ memcpy(s->k, s->onstack, sizeof(s->onstack));
+ }
+}
+
+static inline void bch2_bkey_buf_reassemble(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct bkey_s_c k)
+{
+ bch2_bkey_buf_realloc(s, c, k.k->u64s);
+ bkey_reassemble(s->k, k);
+}
+
+static inline void bch2_bkey_buf_copy(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct bkey_i *src)
+{
+ bch2_bkey_buf_realloc(s, c, src->k.u64s);
+ bkey_copy(s->k, src);
+}
+
+static inline void bch2_bkey_buf_unpack(struct bkey_buf *s,
+ struct bch_fs *c,
+ struct btree *b,
+ struct bkey_packed *src)
+{
+ bch2_bkey_buf_realloc(s, c, BKEY_U64s +
+ bkeyp_val_u64s(&b->format, src));
+ bch2_bkey_unpack(b, s->k, src);
+}
+
+static inline void bch2_bkey_buf_init(struct bkey_buf *s)
+{
+ s->k = (void *) s->onstack;
+}
+
+static inline void bch2_bkey_buf_exit(struct bkey_buf *s, struct bch_fs *c)
+{
+ if (s->k != (void *) s->onstack)
+ mempool_free(s->k, &c->large_bkey_pool);
+ s->k = NULL;
+}
+
+#endif /* _BCACHEFS_BKEY_BUF_H */
diff --git a/libbcachefs/bkey_on_stack.h b/libbcachefs/bkey_on_stack.h
deleted file mode 100644
index f607a0cb..00000000
--- a/libbcachefs/bkey_on_stack.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_BKEY_ON_STACK_H
-#define _BCACHEFS_BKEY_ON_STACK_H
-
-#include "bcachefs.h"
-
-struct bkey_on_stack {
- struct bkey_i *k;
- u64 onstack[12];
-};
-
-static inline void bkey_on_stack_realloc(struct bkey_on_stack *s,
- struct bch_fs *c, unsigned u64s)
-{
- if (s->k == (void *) s->onstack &&
- u64s > ARRAY_SIZE(s->onstack)) {
- s->k = mempool_alloc(&c->large_bkey_pool, GFP_NOFS);
- memcpy(s->k, s->onstack, sizeof(s->onstack));
- }
-}
-
-static inline void bkey_on_stack_reassemble(struct bkey_on_stack *s,
- struct bch_fs *c,
- struct bkey_s_c k)
-{
- bkey_on_stack_realloc(s, c, k.k->u64s);
- bkey_reassemble(s->k, k);
-}
-
-static inline void bkey_on_stack_init(struct bkey_on_stack *s)
-{
- s->k = (void *) s->onstack;
-}
-
-static inline void bkey_on_stack_exit(struct bkey_on_stack *s,
- struct bch_fs *c)
-{
- if (s->k != (void *) s->onstack)
- mempool_free(s->k, &c->large_bkey_pool);
- s->k = NULL;
-}
-
-#endif /* _BCACHEFS_BKEY_ON_STACK_H */
diff --git a/libbcachefs/bkey_sort.c b/libbcachefs/bkey_sort.c
index 99e0a401..2e1d9cd6 100644
--- a/libbcachefs/bkey_sort.c
+++ b/libbcachefs/bkey_sort.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "bkey_sort.h"
#include "bset.h"
#include "extents.h"
@@ -187,11 +187,11 @@ bch2_sort_repack_merge(struct bch_fs *c,
bool filter_whiteouts)
{
struct bkey_packed *out = vstruct_last(dst), *k_packed;
- struct bkey_on_stack k;
+ struct bkey_buf k;
struct btree_nr_keys nr;
memset(&nr, 0, sizeof(nr));
- bkey_on_stack_init(&k);
+ bch2_bkey_buf_init(&k);
while ((k_packed = bch2_btree_node_iter_next_all(iter, src))) {
if (filter_whiteouts && bkey_whiteout(k_packed))
@@ -204,7 +204,7 @@ bch2_sort_repack_merge(struct bch_fs *c,
* node; we have to make a copy of the entire key before calling
* normalize
*/
- bkey_on_stack_realloc(&k, c, k_packed->u64s + BKEY_U64s);
+ bch2_bkey_buf_realloc(&k, c, k_packed->u64s + BKEY_U64s);
bch2_bkey_unpack(src, k.k, k_packed);
if (filter_whiteouts &&
@@ -215,7 +215,7 @@ bch2_sort_repack_merge(struct bch_fs *c,
}
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
- bkey_on_stack_exit(&k, c);
+ bch2_bkey_buf_exit(&k, c);
return nr;
}
@@ -315,11 +315,11 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
struct bkey l_unpacked, r_unpacked;
struct bkey_s l, r;
struct btree_nr_keys nr;
- struct bkey_on_stack split;
+ struct bkey_buf split;
unsigned i;
memset(&nr, 0, sizeof(nr));
- bkey_on_stack_init(&split);
+ bch2_bkey_buf_init(&split);
sort_iter_sort(iter, extent_sort_fix_overlapping_cmp);
for (i = 0; i < iter->used;) {
@@ -379,7 +379,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
/*
* r wins, but it overlaps in the middle of l - split l:
*/
- bkey_on_stack_reassemble(&split, c, l.s_c);
+ bch2_bkey_buf_reassemble(&split, c, l.s_c);
bch2_cut_back(bkey_start_pos(r.k), split.k);
bch2_cut_front_s(r.k->p, l);
@@ -398,7 +398,7 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *c, struct bset *dst,
dst->u64s = cpu_to_le16((u64 *) out - dst->_data);
- bkey_on_stack_exit(&split, c);
+ bch2_bkey_buf_exit(&split, c);
return nr;
}
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index 09774f56..fda6540b 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_io.h"
#include "btree_iter.h"
@@ -898,10 +899,12 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
struct btree *parent;
struct btree_node_iter node_iter;
struct bkey_packed *k;
- BKEY_PADDED(k) tmp;
+ struct bkey_buf tmp;
struct btree *ret = NULL;
unsigned level = b->c.level;
+ bch2_bkey_buf_init(&tmp);
+
parent = btree_iter_node(iter, level + 1);
if (!parent)
return NULL;
@@ -935,9 +938,9 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (!k)
goto out;
- bch2_bkey_unpack(parent, &tmp.k, k);
+ bch2_bkey_buf_unpack(&tmp, c, parent, k);
- ret = bch2_btree_node_get(c, iter, &tmp.k, level,
+ ret = bch2_btree_node_get(c, iter, tmp.k, level,
SIX_LOCK_intent, _THIS_IP_);
if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
@@ -957,7 +960,7 @@ struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
if (sib == btree_prev_sib)
btree_node_unlock(iter, level);
- ret = bch2_btree_node_get(c, iter, &tmp.k, level,
+ ret = bch2_btree_node_get(c, iter, tmp.k, level,
SIX_LOCK_intent, _THIS_IP_);
/*
@@ -998,6 +1001,8 @@ out:
bch2_btree_trans_verify_locks(trans);
+ bch2_bkey_buf_exit(&tmp, c);
+
return ret;
}
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 6268ea63..6b06f607 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -8,7 +8,7 @@
#include "alloc_background.h"
#include "alloc_foreground.h"
#include "bkey_methods.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_locking.h"
#include "btree_update_interior.h"
#include "btree_io.h"
@@ -132,6 +132,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
ptr->gen)) {
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
@@ -145,6 +146,7 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
g2->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0;
set_bit(BCH_FS_FIXED_GENS, &c->flags);
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
}
}
@@ -233,7 +235,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
if (max_stale > 64)
bch2_btree_node_rewrite(c, iter,
b->data->keys.seq,
- BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_NOWAIT|
BTREE_INSERT_GC_LOCK_HELD);
else if (!bch2_btree_gc_rewrite_disabled &&
@@ -268,10 +269,12 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
struct btree_and_journal_iter iter;
struct bkey_s_c k;
struct bpos next_node_start = b->data->min_key;
+ struct bkey_buf tmp;
u8 max_stale = 0;
int ret = 0;
bch2_btree_and_journal_iter_init_node_iter(&iter, journal_keys, b);
+ bch2_bkey_buf_init(&tmp);
while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
bch2_bkey_debugcheck(c, b, k);
@@ -285,10 +288,9 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
if (b->c.level) {
struct btree *child;
- BKEY_PADDED(k) tmp;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ k = bkey_i_to_s_c(tmp.k);
bch2_btree_and_journal_iter_advance(&iter);
@@ -300,7 +302,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
break;
if (b->c.level > target_depth) {
- child = bch2_btree_node_get_noiter(c, &tmp.k,
+ child = bch2_btree_node_get_noiter(c, tmp.k,
b->c.btree_id, b->c.level - 1);
ret = PTR_ERR_OR_ZERO(child);
if (ret)
@@ -318,6 +320,7 @@ static int bch2_gc_btree_init_recurse(struct bch_fs *c, struct btree *b,
}
}
+ bch2_bkey_buf_exit(&tmp, c);
return ret;
}
@@ -570,7 +573,7 @@ static int bch2_gc_done(struct bch_fs *c,
fsck_err(c, _msg ": got %llu, should be %llu" \
, ##__VA_ARGS__, dst->_f, src->_f); \
dst->_f = src->_f; \
- ret = 1; \
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_stripe_field(_f, _msg, ...) \
if (dst->_f != src->_f) { \
@@ -581,7 +584,7 @@ static int bch2_gc_done(struct bch_fs *c,
dst->_f, src->_f); \
dst->_f = src->_f; \
dst->dirty = true; \
- ret = 1; \
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_bucket_field(_f) \
if (dst->b[b].mark._f != src->b[b].mark._f) { \
@@ -592,7 +595,7 @@ static int bch2_gc_done(struct bch_fs *c,
bch2_data_types[dst->b[b].mark.data_type],\
dst->b[b].mark._f, src->b[b].mark._f); \
dst->b[b]._mark._f = src->b[b].mark._f; \
- ret = 1; \
+ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \
}
#define copy_dev_field(_f, _msg, ...) \
copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
@@ -930,10 +933,10 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
int ret = 0;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
@@ -942,7 +945,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
if (gc_btree_gens_key(c, k)) {
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
bch2_extent_normalize(c, bkey_i_to_s(sk.k));
bch2_btree_iter_set_pos(iter, bkey_start_pos(&sk.k->k));
@@ -962,7 +965,7 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
}
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
return ret;
}
@@ -1074,7 +1077,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
}
if (bch2_keylist_realloc(&keylist, NULL, 0,
- (BKEY_U64s + BKEY_EXTENT_U64s_MAX) * nr_old_nodes)) {
+ BKEY_BTREE_PTR_U64s_MAX * nr_old_nodes)) {
trace_btree_gc_coalesce_fail(c,
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC);
return;
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 4dde972d..768fc85e 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1320,12 +1320,13 @@ static void bch2_btree_node_write_error(struct bch_fs *c,
struct btree_write_bio *wbio)
{
struct btree *b = wbio->wbio.bio.bi_private;
- __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
+ struct bkey_buf k;
struct bch_extent_ptr *ptr;
struct btree_trans trans;
struct btree_iter *iter;
int ret;
+ bch2_bkey_buf_init(&k);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_node_iter(&trans, b->c.btree_id, b->key.k.p,
@@ -1344,21 +1345,22 @@ retry:
BUG_ON(!btree_node_hashed(b));
- bkey_copy(&tmp.k, &b->key);
+ bch2_bkey_buf_copy(&k, c, &b->key);
- bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr,
+ bch2_bkey_drop_ptrs(bkey_i_to_s(k.k), ptr,
bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev));
- if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(&tmp.k)))
+ if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(k.k)))
goto err;
- ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
+ ret = bch2_btree_node_update_key(c, iter, b, k.k);
if (ret == -EINTR)
goto retry;
if (ret)
goto err;
out:
bch2_trans_exit(&trans);
+ bch2_bkey_buf_exit(&k, c);
bio_put(&wbio->wbio.bio);
btree_node_write_done(c, b);
return;
@@ -1476,7 +1478,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
struct bset *i;
struct btree_node *bn = NULL;
struct btree_node_entry *bne = NULL;
- BKEY_PADDED(key) k;
+ struct bkey_buf k;
struct bch_extent_ptr *ptr;
struct sort_iter sort_iter;
struct nonce nonce;
@@ -1487,6 +1489,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
bool validate_before_checksum = false;
void *data;
+ bch2_bkey_buf_init(&k);
+
if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags))
return;
@@ -1695,15 +1699,16 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
* just make all btree node writes FUA to keep things sane.
*/
- bkey_copy(&k.key, &b->key);
+ bch2_bkey_buf_copy(&k, c, &b->key);
- bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr)
+ bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(k.k)), ptr)
ptr->offset += b->written;
b->written += sectors_to_write;
/* XXX: submitting IO with btree locks held: */
- bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, &k.key);
+ bch2_submit_wbio_replicas(&wbio->wbio, c, BCH_DATA_btree, k.k);
+ bch2_bkey_buf_exit(&k, c);
return;
err:
set_btree_node_noevict(b);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 8c35e39e..4d825cac 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2,6 +2,7 @@
#include "bcachefs.h"
#include "bkey_methods.h"
+#include "bkey_buf.h"
#include "btree_cache.h"
#include "btree_iter.h"
#include "btree_key_cache.h"
@@ -1048,27 +1049,31 @@ static void btree_iter_prefetch(struct btree_iter *iter)
struct btree_iter_level *l = &iter->l[iter->level];
struct btree_node_iter node_iter = l->iter;
struct bkey_packed *k;
- BKEY_PADDED(k) tmp;
+ struct bkey_buf tmp;
unsigned nr = test_bit(BCH_FS_STARTED, &c->flags)
? (iter->level > 1 ? 0 : 2)
: (iter->level > 1 ? 1 : 16);
bool was_locked = btree_node_locked(iter, iter->level);
+ bch2_bkey_buf_init(&tmp);
+
while (nr) {
if (!bch2_btree_node_relock(iter, iter->level))
- return;
+ break;
bch2_btree_node_iter_advance(&node_iter, l->b);
k = bch2_btree_node_iter_peek(&node_iter, l->b);
if (!k)
break;
- bch2_bkey_unpack(l->b, &tmp.k, k);
- bch2_btree_node_prefetch(c, iter, &tmp.k, iter->level - 1);
+ bch2_bkey_buf_unpack(&tmp, c, l->b, k);
+ bch2_btree_node_prefetch(c, iter, tmp.k, iter->level - 1);
}
if (!was_locked)
btree_node_unlock(iter, iter->level);
+
+ bch2_bkey_buf_exit(&tmp, c);
}
static noinline void btree_node_mem_ptr_set(struct btree_iter *iter,
@@ -1100,30 +1105,34 @@ static __always_inline int btree_iter_down(struct btree_iter *iter,
struct btree *b;
unsigned level = iter->level - 1;
enum six_lock_type lock_type = __btree_lock_want(iter, level);
- BKEY_PADDED(k) tmp;
+ struct bkey_buf tmp;
+ int ret;
EBUG_ON(!btree_node_locked(iter, iter->level));
- bch2_bkey_unpack(l->b, &tmp.k,
+ bch2_bkey_buf_init(&tmp);
+ bch2_bkey_buf_unpack(&tmp, c, l->b,
bch2_btree_node_iter_peek(&l->iter, l->b));
- b = bch2_btree_node_get(c, iter, &tmp.k, level, lock_type, trace_ip);
- if (unlikely(IS_ERR(b)))
- return PTR_ERR(b);
+ b = bch2_btree_node_get(c, iter, tmp.k, level, lock_type, trace_ip);
+ ret = PTR_ERR_OR_ZERO(b);
+ if (unlikely(ret))
+ goto err;
mark_btree_node_locked(iter, level, lock_type);
btree_iter_node_set(iter, b);
- if (tmp.k.k.type == KEY_TYPE_btree_ptr_v2 &&
- unlikely(b != btree_node_mem_ptr(&tmp.k)))
+ if (tmp.k->k.type == KEY_TYPE_btree_ptr_v2 &&
+ unlikely(b != btree_node_mem_ptr(tmp.k)))
btree_node_mem_ptr_set(iter, level + 1, b);
if (iter->flags & BTREE_ITER_PREFETCH)
btree_iter_prefetch(iter);
iter->level = level;
-
- return 0;
+err:
+ bch2_bkey_buf_exit(&tmp, c);
+ return ret;
}
static void btree_iter_up(struct btree_iter *iter)
@@ -2124,9 +2133,12 @@ static struct btree_iter *__btree_trans_get_iter(struct btree_trans *trans,
iter->flags &= ~BTREE_ITER_USER_FLAGS;
iter->flags |= flags & BTREE_ITER_USER_FLAGS;
- if (iter->flags & BTREE_ITER_INTENT)
- bch2_btree_iter_upgrade(iter, 1);
- else
+ if (iter->flags & BTREE_ITER_INTENT) {
+ if (!iter->locks_want) {
+ __bch2_btree_iter_unlock(iter);
+ iter->locks_want = 1;
+ }
+ } else
bch2_btree_iter_downgrade(iter);
BUG_ON(iter->btree_id != btree_id);
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index 1a557b75..4357aefd 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -349,8 +349,6 @@ retry:
BTREE_INSERT_NOUNLOCK|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_JOURNAL_RESERVED|
BTREE_INSERT_JOURNAL_RECLAIM);
err:
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index dc7de271..631bf469 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -57,7 +57,7 @@ struct btree_write {
struct btree_alloc {
struct open_buckets ob;
- BKEY_PADDED(k);
+ __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX);
};
struct btree_bkey_cached_common {
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index adb07043..a2513808 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -20,7 +20,6 @@ enum btree_insert_flags {
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_LAZY_RW,
__BTREE_INSERT_USE_RESERVE,
- __BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY,
__BTREE_INSERT_JOURNAL_RESERVED,
__BTREE_INSERT_JOURNAL_RECLAIM,
@@ -43,7 +42,6 @@ enum btree_insert_flags {
/* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
-#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
/* Insert is for journal replay - don't get journal reservations: */
#define BTREE_INSERT_JOURNAL_REPLAY (1 << __BTREE_INSERT_JOURNAL_REPLAY)
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 8f96756b..5bb65329 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -195,21 +195,18 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
{
struct write_point *wp;
struct btree *b;
- BKEY_PADDED(k) tmp;
+ __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
struct open_buckets ob = { .nr = 0 };
struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
unsigned nr_reserve;
enum alloc_reserve alloc_reserve;
- if (flags & BTREE_INSERT_USE_ALLOC_RESERVE) {
+ if (flags & BTREE_INSERT_USE_RESERVE) {
nr_reserve = 0;
- alloc_reserve = RESERVE_ALLOC;
- } else if (flags & BTREE_INSERT_USE_RESERVE) {
- nr_reserve = BTREE_NODE_RESERVE / 2;
- alloc_reserve = RESERVE_BTREE;
+ alloc_reserve = RESERVE_BTREE_MOVINGGC;
} else {
nr_reserve = BTREE_NODE_RESERVE;
- alloc_reserve = RESERVE_NONE;
+ alloc_reserve = RESERVE_BTREE;
}
mutex_lock(&c->btree_reserve_cache_lock);
@@ -577,8 +574,6 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_trans_init(&trans, c, 0, 512);
ret = __bch2_trans_do(&trans, &as->disk_res, &journal_seq,
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE|
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_JOURNAL_RECLAIM|
BTREE_INSERT_JOURNAL_RESERVED,
@@ -1232,6 +1227,9 @@ static void btree_split_insert_keys(struct btree_update *as, struct btree *b,
src = n;
}
+ /* Also clear out the unwritten whiteouts area: */
+ b->whiteout_u64s = 0;
+
i->u64s = cpu_to_le16((u64 *) dst - i->_data);
set_btree_bset_end(b, b->set);
@@ -1457,15 +1455,6 @@ int bch2_btree_split_leaf(struct bch_fs *c, struct btree_iter *iter,
struct btree_update *as;
struct closure cl;
int ret = 0;
- struct btree_insert_entry *i;
-
- /*
- * We already have a disk reservation and open buckets pinned; this
- * allocation must not block:
- */
- trans_for_each_update(trans, i)
- if (btree_node_type_needs_gc(i->iter->btree_id))
- flags |= BTREE_INSERT_USE_RESERVE;
closure_init_stack(&cl);
@@ -1926,10 +1915,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter,
retry:
as = bch2_btree_update_start(iter->trans, iter->btree_id,
parent ? btree_update_reserve_required(c, parent) : 0,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
- BTREE_INSERT_USE_ALLOC_RESERVE,
- &cl);
+ BTREE_INSERT_NOFAIL, &cl);
if (IS_ERR(as)) {
ret = PTR_ERR(as);
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 64734f91..c490df47 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -869,8 +869,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
trans_trigger_run = false;
trans_for_each_update(trans, i) {
- if (unlikely(i->iter->uptodate > BTREE_ITER_NEED_PEEK &&
- (ret = bch2_btree_iter_traverse(i->iter)))) {
+ ret = bch2_btree_iter_traverse(i->iter);
+ if (unlikely(ret)) {
trace_trans_restart_traverse(trans->ip);
goto out;
}
@@ -879,8 +879,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
* We're not using bch2_btree_iter_upgrade here because
* we know trans->nounlock can't be set:
*/
- if (unlikely(i->iter->locks_want < 1 &&
- !__bch2_btree_iter_upgrade(i->iter, 1))) {
+ if (unlikely(!btree_node_intent_locked(i->iter, i->iter->level) &&
+ !__bch2_btree_iter_upgrade(i->iter, i->iter->level + 1))) {
trace_trans_restart_upgrade(trans->ip);
ret = -EINTR;
goto out;
@@ -1084,8 +1084,7 @@ int bch2_btree_delete_at(struct btree_trans *trans,
bch2_trans_update(trans, iter, &k, 0);
return bch2_trans_commit(trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|flags);
+ BTREE_INSERT_NOFAIL|flags);
}
int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index 1934b845..8bbf958d 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -2192,7 +2192,7 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
ca->mi.bucket_size / c->opts.btree_node_size);
/* XXX: these should be tunable */
size_t reserve_none = max_t(size_t, 1, nbuckets >> 9);
- size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 7);
+ size_t copygc_reserve = max_t(size_t, 2, nbuckets >> 6);
size_t free_inc_nr = max(max_t(size_t, 1, nbuckets >> 12),
btree_reserve * 2);
bool resize = ca->buckets[0] != NULL;
@@ -2209,7 +2209,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
!(buckets_nouse = kvpmalloc(BITS_TO_LONGS(nbuckets) *
sizeof(unsigned long),
GFP_KERNEL|__GFP_ZERO)) ||
- !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_MOVINGGC],
copygc_reserve, GFP_KERNEL) ||
!init_fifo(&free[RESERVE_NONE], reserve_none, GFP_KERNEL) ||
diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c
index aebf46bb..f63651d2 100644
--- a/libbcachefs/compress.c
+++ b/libbcachefs/compress.c
@@ -336,8 +336,19 @@ static int attempt_compress(struct bch_fs *c,
ZSTD_CCtx *ctx = ZSTD_initCCtx(workspace,
ZSTD_CCtxWorkspaceBound(c->zstd_params.cParams));
+ /*
+ * ZSTD requires that when we decompress we pass in the exact
+ * compressed size - rounding it up to the nearest sector
+ * doesn't work, so we use the first 4 bytes of the buffer for
+ * that.
+ *
+ * Additionally, the ZSTD code seems to have a bug where it will
+ * write just past the end of the buffer - so subtract a fudge
+ * factor (7 bytes) from the dst buffer size to account for
+ * that.
+ */
size_t len = ZSTD_compressCCtx(ctx,
- dst + 4, dst_len - 4,
+ dst + 4, dst_len - 4 - 7,
src, src_len,
c->zstd_params);
if (ZSTD_isError(len))
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 8f39c4de..1c08f563 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -4,7 +4,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "bset.h"
#include "btree_gc.h"
#include "btree_update.h"
@@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
return false;
}
+/* Stripe bufs: */
+
+static void ec_stripe_buf_free(struct ec_stripe_buf *stripe)
+{
+ unsigned i;
+
+ for (i = 0; i < stripe->key.v.nr_blocks; i++) {
+ kvpfree(stripe->data[i], stripe->size << 9);
+ stripe->data[i] = NULL;
+ }
+}
+
+static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe)
+{
+ unsigned i;
+
+ memset(stripe->valid, 0xFF, sizeof(stripe->valid));
+
+ for (i = 0; i < stripe->key.v.nr_blocks; i++) {
+ stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL);
+ if (!stripe->data[i])
+ goto err;
+ }
+
+ return 0;
+err:
+ ec_stripe_buf_free(stripe);
+ return -ENOMEM;
+}
+
/* Checksumming: */
static void ec_generate_checksums(struct ec_stripe_buf *buf)
@@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf)
raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
}
-static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
-{
- return nr - bitmap_weight(buf->valid, nr);
-}
-
static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
{
- return __ec_nr_failed(buf, buf->key.v.nr_blocks);
+ return buf->key.v.nr_blocks -
+ bitmap_weight(buf->valid, buf->key.v.nr_blocks);
}
static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
@@ -757,10 +783,10 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
struct btree_iter *iter;
struct bkey_s_c k;
struct bkey_s_extent e;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
int ret = 0, dev, idx;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
/* XXX this doesn't support the reflink btree */
@@ -787,7 +813,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
dev = s->key.v.ptrs[idx].dev;
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
e = bkey_i_to_s_extent(sk.k);
bch2_bkey_drop_ptrs(e.s, ptr, ptr->dev != dev);
@@ -800,8 +826,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
bch2_trans_update(&trans, iter, sk.k, 0);
ret = bch2_trans_commit(&trans, NULL, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE);
+ BTREE_INSERT_NOFAIL);
if (ret == -EINTR)
ret = 0;
if (ret)
@@ -809,7 +834,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
}
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
return ret;
}
@@ -823,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s)
struct open_bucket *ob;
struct bkey_i *k;
struct stripe *m;
- struct bch_stripe *v = &s->stripe.key.v;
+ struct bch_stripe *v = &s->new_stripe.key.v;
unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
- struct closure cl;
int ret;
BUG_ON(s->h->s == s);
- closure_init_stack(&cl);
+ closure_sync(&s->iodone);
if (s->err) {
if (s->err != -EROFS)
@@ -838,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}
+ if (s->have_existing_stripe) {
+ ec_validate_checksums(c, &s->existing_stripe);
+
+ if (ec_do_recov(c, &s->existing_stripe)) {
+ bch_err(c, "error creating stripe: error reading existing stripe");
+ goto err;
+ }
+
+ for (i = 0; i < nr_data; i++)
+ if (stripe_blockcount_get(&s->existing_stripe.key.v, i))
+ swap(s->new_stripe.data[i],
+ s->existing_stripe.data[i]);
+
+ ec_stripe_buf_free(&s->existing_stripe);
+ }
+
BUG_ON(!s->allocated);
if (!percpu_ref_tryget(&c->writes))
@@ -846,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s)
BUG_ON(bitmap_weight(s->blocks_allocated,
s->blocks.nr) != s->blocks.nr);
- ec_generate_ec(&s->stripe);
+ ec_generate_ec(&s->new_stripe);
- ec_generate_checksums(&s->stripe);
+ ec_generate_checksums(&s->new_stripe);
/* write p/q: */
for (i = nr_data; i < v->nr_blocks; i++)
- ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
-
- closure_sync(&cl);
+ ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
+ closure_sync(&s->iodone);
- for (i = nr_data; i < v->nr_blocks; i++)
- if (!test_bit(i, s->stripe.valid)) {
- bch_err(c, "error creating stripe: error writing redundancy buckets");
- goto err_put_writes;
- }
+ if (ec_nr_failed(&s->new_stripe)) {
+ bch_err(c, "error creating stripe: error writing redundancy buckets");
+ goto err_put_writes;
+ }
- ret = s->existing_stripe
- ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
+ ret = s->have_existing_stripe
+ ? bch2_btree_insert(c, BTREE_ID_EC, &s->new_stripe.key.k_i,
&s->res, NULL, BTREE_INSERT_NOFAIL)
- : ec_stripe_bkey_insert(c, s, &s->stripe.key);
+ : ec_stripe_bkey_insert(c, s, &s->new_stripe.key);
if (ret) {
bch_err(c, "error creating stripe: error creating stripe key");
goto err_put_writes;
}
for_each_keylist_key(&s->keys, k) {
- ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
+ ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k);
if (ret) {
bch_err(c, "error creating stripe: error %i updating pointers", ret);
break;
@@ -880,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
}
spin_lock(&c->ec_stripes_heap_lock);
- m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
+ m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset);
#if 0
pr_info("created a %s stripe %llu",
- s->existing_stripe ? "existing" : "new",
+ s->have_existing_stripe ? "existing" : "new",
s->stripe.key.k.p.offset);
#endif
BUG_ON(m->on_heap);
- bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
+ bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset);
spin_unlock(&c->ec_stripes_heap_lock);
err_put_writes:
percpu_ref_put(&c->writes);
@@ -903,8 +941,9 @@ err:
bch2_keylist_free(&s->keys, s->inline_keys);
- for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
- kvpfree(s->stripe.data[i], s->stripe.size << 9);
+ ec_stripe_buf_free(&s->existing_stripe);
+ ec_stripe_buf_free(&s->new_stripe);
+ closure_debug_destroy(&s->iodone);
kfree(s);
}
@@ -981,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
ca = bch_dev_bkey_exists(c, ob->ptr.dev);
offset = ca->mi.bucket_size - ob->sectors_free;
- return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
+ return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
}
void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
@@ -1088,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c,
static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
{
struct ec_stripe_new *s;
- unsigned i;
lockdep_assert_held(&h->lock);
@@ -1097,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
return -ENOMEM;
mutex_init(&s->lock);
+ closure_init(&s->iodone, NULL);
atomic_set(&s->pin, 1);
s->c = c;
s->h = h;
@@ -1106,32 +1145,20 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
bch2_keylist_init(&s->keys, s->inline_keys);
- s->stripe.offset = 0;
- s->stripe.size = h->blocksize;
- memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
+ s->new_stripe.offset = 0;
+ s->new_stripe.size = h->blocksize;
- ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
+ ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data,
s->nr_parity, h->blocksize);
- for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
- s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
- if (!s->stripe.data[i])
- goto err;
- }
-
h->s = s;
-
return 0;
-err:
- for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
- kvpfree(s->stripe.data[i], s->stripe.size << 9);
- kfree(s);
- return -ENOMEM;
}
static struct ec_stripe_head *
ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
- unsigned algo, unsigned redundancy)
+ unsigned algo, unsigned redundancy,
+ bool copygc)
{
struct ec_stripe_head *h;
struct bch_dev *ca;
@@ -1147,6 +1174,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned target,
h->target = target;
h->algo = algo;
h->redundancy = redundancy;
+ h->copygc = copygc;
rcu_read_lock();
h->devs = target_rw_devs(c, BCH_DATA_user, target);
@@ -1178,9 +1206,10 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
}
struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
- unsigned target,
- unsigned algo,
- unsigned redundancy)
+ unsigned target,
+ unsigned algo,
+ unsigned redundancy,
+ bool copygc)
{
struct ec_stripe_head *h;
@@ -1191,21 +1220,21 @@ struct ec_stripe_head *__bch2_ec_stripe_head_get(struct bch_fs *c,
list_for_each_entry(h, &c->ec_stripe_head_list, list)
if (h->target == target &&
h->algo == algo &&
- h->redundancy == redundancy) {
+ h->redundancy == redundancy &&
+ h->copygc == copygc) {
mutex_lock(&h->lock);
goto found;
}
- h = ec_new_stripe_head_alloc(c, target, algo, redundancy);
+ h = ec_new_stripe_head_alloc(c, target, algo, redundancy, copygc);
found:
mutex_unlock(&c->ec_stripe_head_lock);
return h;
}
-/*
- * XXX: use a higher watermark for allocating open buckets here:
- */
-static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
+static enum bucket_alloc_ret
+new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
+ struct closure *cl)
{
struct bch_devs_mask devs;
struct open_bucket *ob;
@@ -1213,12 +1242,12 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
min_t(unsigned, h->nr_active_devs,
BCH_BKEY_PTRS_MAX) - h->redundancy;
bool have_cache = true;
- int ret = 0;
+ enum bucket_alloc_ret ret = ALLOC_SUCCESS;
devs = h->devs;
for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
- __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
+ __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
--nr_data;
}
@@ -1242,9 +1271,11 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
h->redundancy,
&nr_have,
&have_cache,
- RESERVE_NONE,
+ h->copygc
+ ? RESERVE_MOVINGGC
+ : RESERVE_NONE,
0,
- NULL);
+ cl);
if (ret)
goto err;
}
@@ -1258,9 +1289,11 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
nr_data,
&nr_have,
&have_cache,
- RESERVE_NONE,
+ h->copygc
+ ? RESERVE_MOVINGGC
+ : RESERVE_NONE,
0,
- NULL);
+ cl);
if (ret)
goto err;
}
@@ -1326,64 +1359,84 @@ static int get_stripe_key(struct bch_fs *c, u64 idx, struct ec_stripe_buf *strip
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
unsigned target,
unsigned algo,
- unsigned redundancy)
+ unsigned redundancy,
+ bool copygc,
+ struct closure *cl)
{
- struct closure cl;
struct ec_stripe_head *h;
struct open_bucket *ob;
unsigned i, data_idx = 0;
s64 idx;
int ret;
- closure_init_stack(&cl);
-
- h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
- if (!h)
+ h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc);
+ if (!h) {
+ bch_err(c, "no stripe head");
return NULL;
+ }
if (!h->s) {
if (ec_new_stripe_alloc(c, h)) {
bch2_ec_stripe_head_put(c, h);
+ bch_err(c, "failed to allocate new stripe");
return NULL;
}
idx = get_existing_stripe(c, target, algo, redundancy);
if (idx >= 0) {
- h->s->existing_stripe = true;
- h->s->existing_stripe_idx = idx;
- if (get_stripe_key(c, idx, &h->s->stripe)) {
- /* btree error */
+ h->s->have_existing_stripe = true;
+ ret = get_stripe_key(c, idx, &h->s->existing_stripe);
+ if (ret) {
+ bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
+ bch2_ec_stripe_head_put(c, h);
+ return NULL;
+ }
+
+ if (ec_stripe_buf_alloc(&h->s->existing_stripe)) {
+ /*
+ * this is a problem: we have deleted from the
+ * stripes heap already
+ */
BUG();
}
- for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
- if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
+ for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
+ if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i))
__set_bit(i, h->s->blocks_allocated);
- ec_block_io(c, &h->s->stripe, READ, i, &cl);
- }
+
+ ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+ }
+
+ bkey_copy(&h->s->new_stripe.key.k_i,
+ &h->s->existing_stripe.key.k_i);
+ }
+
+ if (ec_stripe_buf_alloc(&h->s->new_stripe)) {
+ BUG();
}
}
if (!h->s->allocated) {
- if (!h->s->existing_stripe &&
+ if (!h->s->have_existing_stripe &&
!h->s->res.sectors) {
ret = bch2_disk_reservation_get(c, &h->s->res,
- h->blocksize,
- h->s->nr_parity, 0);
+ h->blocksize,
+ h->s->nr_parity, 0);
if (ret) {
- /* What should we do here? */
- bch_err(c, "unable to create new stripe: %i", ret);
+ /*
+ * This means we need to wait for copygc to
+ * empty out buckets from existing stripes:
+ */
bch2_ec_stripe_head_put(c, h);
h = NULL;
goto out;
-
}
-
}
- if (new_stripe_alloc_buckets(c, h)) {
+ ret = new_stripe_alloc_buckets(c, h, cl);
+ if (ret) {
bch2_ec_stripe_head_put(c, h);
- h = NULL;
+ h = ERR_PTR(-ret);
goto out;
}
@@ -1392,19 +1445,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
h->s->nr_data, data_idx);
BUG_ON(data_idx >= h->s->nr_data);
- h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
+ h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr;
h->s->data_block_idx[i] = data_idx;
data_idx++;
}
open_bucket_for_each(c, &h->s->parity, ob, i)
- h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
+ h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
//pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
h->s->allocated = true;
}
out:
- closure_sync(&cl);
return h;
}
diff --git a/libbcachefs/ec.h b/libbcachefs/ec.h
index 450bb1a1..97a263cf 100644
--- a/libbcachefs/ec.h
+++ b/libbcachefs/ec.h
@@ -88,6 +88,7 @@ struct ec_stripe_new {
struct ec_stripe_head *h;
struct mutex lock;
struct list_head list;
+ struct closure iodone;
/* counts in flight writes, stripe is created when pin == 0 */
atomic_t pin;
@@ -98,8 +99,7 @@ struct ec_stripe_new {
u8 nr_parity;
bool allocated;
bool pending;
- bool existing_stripe;
- u64 existing_stripe_idx;
+ bool have_existing_stripe;
unsigned long blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
@@ -111,7 +111,8 @@ struct ec_stripe_new {
struct keylist keys;
u64 inline_keys[BKEY_U64s * 8];
- struct ec_stripe_buf stripe;
+ struct ec_stripe_buf new_stripe;
+ struct ec_stripe_buf existing_stripe;
};
struct ec_stripe_head {
@@ -121,6 +122,7 @@ struct ec_stripe_head {
unsigned target;
unsigned algo;
unsigned redundancy;
+ bool copygc;
struct bch_devs_mask devs;
unsigned nr_active_devs;
@@ -145,8 +147,8 @@ void bch2_ec_bucket_cancel(struct bch_fs *, struct open_bucket *);
int bch2_ec_stripe_new_alloc(struct bch_fs *, struct ec_stripe_head *);
void bch2_ec_stripe_head_put(struct bch_fs *, struct ec_stripe_head *);
-struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
- unsigned, unsigned);
+struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *,
+ unsigned, unsigned, unsigned, bool, struct closure *);
void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
diff --git a/libbcachefs/extent_update.c b/libbcachefs/extent_update.c
index fd011df3..1faca4bc 100644
--- a/libbcachefs/extent_update.c
+++ b/libbcachefs/extent_update.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "bkey_on_stack.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 828ccf07..c0ae3123 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -665,7 +665,7 @@ bool bch2_bkey_is_incompressible(struct bkey_s_c k)
}
bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
- unsigned nr_replicas)
+ unsigned nr_replicas, bool compressed)
{
struct btree_trans trans;
struct btree_iter *iter;
@@ -683,7 +683,8 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
break;
- if (nr_replicas > bch2_bkey_nr_ptrs_fully_allocated(k)) {
+ if (nr_replicas > bch2_bkey_replicas(c, k) ||
+ (!compressed && bch2_bkey_sectors_compressed(k))) {
ret = false;
break;
}
@@ -693,6 +694,33 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size,
return ret;
}
+unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned replicas = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.ptr.cached)
+ continue;
+
+ if (p.has_ec) {
+ struct stripe *s =
+ genradix_ptr(&c->stripes[0], p.ec.idx);
+
+ WARN_ON(!s);
+ if (s)
+ replicas += s->nr_redundant;
+ }
+
+ replicas++;
+
+ }
+
+ return replicas;
+}
+
static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
struct extent_ptr_decoded p)
{
diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h
index 74c7bb8f..ebe0a04c 100644
--- a/libbcachefs/extents.h
+++ b/libbcachefs/extents.h
@@ -538,7 +538,9 @@ unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
bool bch2_bkey_is_incompressible(struct bkey_s_c);
unsigned bch2_bkey_sectors_compressed(struct bkey_s_c);
-bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned);
+bool bch2_check_range_allocated(struct bch_fs *, struct bpos, u64, unsigned, bool);
+
+unsigned bch2_bkey_replicas(struct bch_fs *, struct bkey_s_c);
unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c);
void bch2_bkey_mark_replicas_cached(struct bch_fs *, struct bkey_s,
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 53c6660e..959eff4c 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -3,7 +3,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "clock.h"
@@ -791,7 +791,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
struct readpages_iter *readpages_iter)
{
struct bch_fs *c = trans->c;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
int flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE;
int ret = 0;
@@ -799,7 +799,7 @@ static void bchfs_read(struct btree_trans *trans, struct btree_iter *iter,
rbio->c = c;
rbio->start_time = local_clock();
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
retry:
while (1) {
struct bkey_s_c k;
@@ -817,7 +817,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
ret = bch2_read_indirect_extent(trans,
&offset_into_extent, &sk);
@@ -862,7 +862,7 @@ retry:
bio_endio(&rbio->bio);
}
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
}
void bch2_readahead(struct readahead_control *ractl)
@@ -1863,7 +1863,9 @@ static long bch2_dio_write_loop(struct dio_write *dio)
dio->op.opts.data_replicas, 0);
if (unlikely(ret) &&
!bch2_check_range_allocated(c, dio->op.pos,
- bio_sectors(bio), dio->op.opts.data_replicas))
+ bio_sectors(bio),
+ dio->op.opts.data_replicas,
+ dio->op.opts.compression != 0))
goto err;
task_io_account_write(bio->bi_iter.bi_size);
@@ -2414,7 +2416,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct address_space *mapping = inode->v.i_mapping;
- struct bkey_on_stack copy;
+ struct bkey_buf copy;
struct btree_trans trans;
struct btree_iter *src, *dst;
loff_t shift, new_size;
@@ -2424,7 +2426,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
if ((offset | len) & (block_bytes(c) - 1))
return -EINVAL;
- bkey_on_stack_init(&copy);
+ bch2_bkey_buf_init(&copy);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 256);
/*
@@ -2512,7 +2514,7 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
bkey_cmp(k.k->p, POS(inode->v.i_ino, offset >> 9)) <= 0)
break;
reassemble:
- bkey_on_stack_reassemble(&copy, c, k);
+ bch2_bkey_buf_reassemble(&copy, c, k);
if (insert &&
bkey_cmp(bkey_start_pos(k.k), move_pos) < 0)
@@ -2589,7 +2591,7 @@ bkey_err:
}
err:
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&copy, c);
+ bch2_bkey_buf_exit(&copy, c);
bch2_pagecache_block_put(&inode->ei_pagecache_lock);
inode_unlock(&inode->v);
return ret;
diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c
index e3edca4d..9ce03172 100644
--- a/libbcachefs/fs.c
+++ b/libbcachefs/fs.c
@@ -3,7 +3,7 @@
#include "bcachefs.h"
#include "acl.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "buckets.h"
#include "chardev.h"
@@ -886,7 +886,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- struct bkey_on_stack cur, prev;
+ struct bkey_buf cur, prev;
struct bpos end = POS(ei->v.i_ino, (start + len) >> 9);
unsigned offset_into_extent, sectors;
bool have_extent = false;
@@ -899,8 +899,8 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info,
if (start + len < start)
return -EINVAL;
- bkey_on_stack_init(&cur);
- bkey_on_stack_init(&prev);
+ bch2_bkey_buf_init(&cur);
+ bch2_bkey_buf_init(&prev);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -919,7 +919,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
- bkey_on_stack_reassemble(&cur, c, k);
+ bch2_bkey_buf_reassemble(&cur, c, k);
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &cur);
@@ -927,7 +927,7 @@ retry:
break;
k = bkey_i_to_s_c(cur.k);
- bkey_on_stack_realloc(&prev, c, k.k->u64s);
+ bch2_bkey_buf_realloc(&prev, c, k.k->u64s);
sectors = min(sectors, k.k->size - offset_into_extent);
@@ -961,8 +961,8 @@ retry:
FIEMAP_EXTENT_LAST);
ret = bch2_trans_exit(&trans) ?: ret;
- bkey_on_stack_exit(&cur, c);
- bkey_on_stack_exit(&prev, c);
+ bch2_bkey_buf_exit(&cur, c);
+ bch2_bkey_buf_exit(&prev, c);
return ret < 0 ? ret : 0;
}
@@ -1007,10 +1007,7 @@ static const struct file_operations bch_file_operations = {
.open = generic_file_open,
.fsync = bch2_fsync,
.splice_read = generic_file_splice_read,
- /*
- * Broken, on v5.3:
.splice_write = iter_file_splice_write,
- */
.fallocate = bch2_fallocate_dispatch,
.unlocked_ioctl = bch2_fs_file_ioctl,
#ifdef CONFIG_COMPAT
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index 39f872de..df0f00f1 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "dirent.h"
#include "error.h"
@@ -464,11 +464,11 @@ static int check_extents(struct bch_fs *c)
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- struct bkey_on_stack prev;
+ struct bkey_buf prev;
u64 i_sectors;
int ret = 0;
- bkey_on_stack_init(&prev);
+ bch2_bkey_buf_init(&prev);
prev.k->k = KEY(0, 0, 0);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
@@ -500,7 +500,7 @@ retry:
goto err;
}
}
- bkey_on_stack_reassemble(&prev, c, k);
+ bch2_bkey_buf_reassemble(&prev, c, k);
ret = walk_inode(&trans, &w, k.k->p.inode);
if (ret)
@@ -569,7 +569,7 @@ err:
fsck_err:
if (ret == -EINTR)
goto retry;
- bkey_on_stack_exit(&prev, c);
+ bch2_bkey_buf_exit(&prev, c);
return bch2_trans_exit(&trans) ?: ret;
}
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index abf204ef..4c4ba07c 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -9,7 +9,7 @@
#include "bcachefs.h"
#include "alloc_background.h"
#include "alloc_foreground.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "bset.h"
#include "btree_update.h"
#include "buckets.h"
@@ -183,18 +183,23 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
/* Extent update path: */
-static int sum_sector_overwrites(struct btree_trans *trans,
- struct btree_iter *extent_iter,
- struct bkey_i *new,
- bool *maybe_extending,
- s64 *i_sectors_delta,
- s64 *disk_sectors_delta)
+int bch2_sum_sector_overwrites(struct btree_trans *trans,
+ struct btree_iter *extent_iter,
+ struct bkey_i *new,
+ bool *maybe_extending,
+ bool *should_check_enospc,
+ s64 *i_sectors_delta,
+ s64 *disk_sectors_delta)
{
+ struct bch_fs *c = trans->c;
struct btree_iter *iter;
struct bkey_s_c old;
+ unsigned new_replicas = bch2_bkey_replicas(c, bkey_i_to_s_c(new));
+ bool new_compressed = bch2_bkey_sectors_compressed(bkey_i_to_s_c(new));
int ret = 0;
*maybe_extending = true;
+ *should_check_enospc = false;
*i_sectors_delta = 0;
*disk_sectors_delta = 0;
@@ -213,6 +218,11 @@ static int sum_sector_overwrites(struct btree_trans *trans,
(int) (bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(new)) -
bch2_bkey_nr_ptrs_fully_allocated(old));
+ if (!*should_check_enospc &&
+ (new_replicas > bch2_bkey_replicas(c, old) ||
+ (!new_compressed && bch2_bkey_sectors_compressed(old))))
+ *should_check_enospc = true;
+
if (bkey_cmp(old.k->p, new->k.p) >= 0) {
/*
* Check if there's already data above where we're
@@ -250,7 +260,7 @@ int bch2_extent_update(struct btree_trans *trans,
{
/* this must live until after bch2_trans_commit(): */
struct bkey_inode_buf inode_p;
- bool extending = false;
+ bool extending = false, should_check_enospc;
s64 i_sectors_delta = 0, disk_sectors_delta = 0;
int ret;
@@ -258,8 +268,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (ret)
return ret;
- ret = sum_sector_overwrites(trans, iter, k,
+ ret = bch2_sum_sector_overwrites(trans, iter, k,
&extending,
+ &should_check_enospc,
&i_sectors_delta,
&disk_sectors_delta);
if (ret)
@@ -269,7 +280,8 @@ int bch2_extent_update(struct btree_trans *trans,
disk_sectors_delta > (s64) disk_res->sectors) {
ret = bch2_disk_reservation_add(trans->c, disk_res,
disk_sectors_delta - disk_res->sectors,
- 0);
+ !should_check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret)
return ret;
}
@@ -320,8 +332,7 @@ int bch2_extent_update(struct btree_trans *trans,
ret = bch2_trans_commit(trans, disk_res, journal_seq,
BTREE_INSERT_NOCHECK_RW|
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE);
+ BTREE_INSERT_NOFAIL);
if (ret)
return ret;
@@ -404,14 +415,14 @@ int bch2_fpunch(struct bch_fs *c, u64 inum, u64 start, u64 end,
int bch2_write_index_default(struct bch_write_op *op)
{
struct bch_fs *c = op->c;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
struct keylist *keys = &op->insert_keys;
struct bkey_i *k = bch2_keylist_front(keys);
struct btree_trans trans;
struct btree_iter *iter;
int ret;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -423,7 +434,7 @@ int bch2_write_index_default(struct bch_write_op *op)
k = bch2_keylist_front(keys);
- bkey_on_stack_realloc(&sk, c, k->k.u64s);
+ bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
bkey_copy(sk.k, k);
bch2_cut_front(iter->pos, sk.k);
@@ -440,7 +451,7 @@ int bch2_write_index_default(struct bch_write_op *op)
} while (!bch2_keylist_empty(keys));
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
return ret;
}
@@ -1617,14 +1628,14 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio
{
struct btree_trans trans;
struct btree_iter *iter;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
@@ -1636,7 +1647,7 @@ retry:
if (bkey_err(k))
goto err;
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
@@ -1657,7 +1668,7 @@ retry:
out:
bch2_rbio_done(rbio);
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
return;
err:
rbio->bio.bi_status = BLK_STS_IOERR;
@@ -1670,14 +1681,14 @@ static void bch2_read_retry(struct bch_fs *c, struct bch_read_bio *rbio,
{
struct btree_trans trans;
struct btree_iter *iter;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
struct bkey_s_c k;
int ret;
flags &= ~BCH_READ_LAST_FRAGMENT;
flags |= BCH_READ_MUST_CLONE;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -1687,7 +1698,7 @@ retry:
BTREE_ITER_SLOTS, k, ret) {
unsigned bytes, sectors, offset_into_extent;
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
offset_into_extent = iter->pos.offset -
bkey_start_offset(k.k);
@@ -1736,7 +1747,7 @@ err:
rbio->bio.bi_status = BLK_STS_IOERR;
out:
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
bch2_rbio_done(rbio);
}
@@ -1807,17 +1818,6 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
if ((ret = bkey_err(k)))
goto out;
- /*
- * going to be temporarily appending another checksum entry:
- */
- new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
- BKEY_EXTENT_U64s_MAX * 8);
- if ((ret = PTR_ERR_OR_ZERO(new)))
- goto out;
-
- bkey_reassemble(new, k);
- k = bkey_i_to_s_c(new);
-
if (bversion_cmp(k.k->version, rbio->version) ||
!bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset))
goto out;
@@ -1836,6 +1836,16 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans,
goto out;
}
+ /*
+ * going to be temporarily appending another checksum entry:
+ */
+ new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
+ sizeof(struct bch_extent_crc128));
+ if ((ret = PTR_ERR_OR_ZERO(new)))
+ goto out;
+
+ bkey_reassemble(new, k);
+
if (!bch2_bkey_narrow_crcs(new, new_crc))
goto out;
@@ -2002,7 +2012,7 @@ static void bch2_read_endio(struct bio *bio)
int __bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
- struct bkey_on_stack *orig_k)
+ struct bkey_buf *orig_k)
{
struct btree_iter *iter;
struct bkey_s_c k;
@@ -2029,7 +2039,7 @@ int __bch2_read_indirect_extent(struct btree_trans *trans,
}
*offset_into_extent = iter->pos.offset - bkey_start_offset(k.k);
- bkey_on_stack_reassemble(orig_k, trans->c, k);
+ bch2_bkey_buf_reassemble(orig_k, trans->c, k);
err:
bch2_trans_iter_put(trans, iter);
return ret;
@@ -2208,7 +2218,11 @@ get_bio:
bch2_increment_clock(c, bio_sectors(&rbio->bio), READ);
- if (pick.ptr.cached)
+ /*
+ * If it's being moved internally, we don't want to flag it as a cache
+ * hit:
+ */
+ if (pick.ptr.cached && !(flags & BCH_READ_NODECODE))
bch2_bucket_io_time_reset(trans, pick.ptr.dev,
PTR_BUCKET_NR(ca, &pick.ptr), READ);
@@ -2290,7 +2304,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
{
struct btree_trans trans;
struct btree_iter *iter;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
struct bkey_s_c k;
unsigned flags = BCH_READ_RETRY_IF_STALE|
BCH_READ_MAY_PROMOTE|
@@ -2304,7 +2318,7 @@ void bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, u64 inode)
rbio->c = c;
rbio->start_time = local_clock();
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
retry:
bch2_trans_begin(&trans);
@@ -2327,7 +2341,7 @@ retry:
bkey_start_offset(k.k);
sectors = k.k->size - offset_into_extent;
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
ret = bch2_read_indirect_extent(&trans,
&offset_into_extent, &sk);
@@ -2364,7 +2378,7 @@ retry:
}
out:
bch2_trans_exit(&trans);
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
return;
err:
if (ret == -EINTR)
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
index e6aac594..04f6baa1 100644
--- a/libbcachefs/io.h
+++ b/libbcachefs/io.h
@@ -3,7 +3,7 @@
#define _BCACHEFS_IO_H
#include "checksum.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "io_types.h"
#define to_wbio(_bio) \
@@ -60,6 +60,8 @@ static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
: op->c->wq;
}
+int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
+ struct bkey_i *, bool *, bool *, s64 *, s64 *);
int bch2_extent_update(struct btree_trans *, struct btree_iter *,
struct bkey_i *, struct disk_reservation *,
u64 *, u64, s64 *);
@@ -112,11 +114,11 @@ struct cache_promote_op;
struct extent_ptr_decoded;
int __bch2_read_indirect_extent(struct btree_trans *, unsigned *,
- struct bkey_on_stack *);
+ struct bkey_buf *);
static inline int bch2_read_indirect_extent(struct btree_trans *trans,
unsigned *offset_into_extent,
- struct bkey_on_stack *k)
+ struct bkey_buf *k)
{
return k->k->k.type == KEY_TYPE_reflink_p
? __bch2_read_indirect_extent(trans, offset_into_extent, k)
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index d5442482..69e487bc 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -777,7 +777,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
}
} else {
rcu_read_lock();
- ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
+ ob = bch2_bucket_alloc(c, ca, RESERVE_NONE,
false, cl);
rcu_read_unlock();
if (IS_ERR(ob)) {
@@ -1095,7 +1095,7 @@ int bch2_fs_journal_init(struct journal *j)
/* Btree roots: */
j->entry_u64s_reserved +=
- BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_EXTENT_U64s_MAX);
+ BTREE_ID_NR * (JSET_KEYS_U64s + BKEY_BTREE_PTR_U64s_MAX);
atomic64_set(&j->reservations.counter,
((union journal_res_state)
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 0e6fbe2f..2a344a04 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -577,8 +577,15 @@ reread:
if (bch2_dev_io_err_on(ret, ca,
"journal read error: sector %llu",
offset) ||
- bch2_meta_read_fault("journal"))
- return -EIO;
+ bch2_meta_read_fault("journal")) {
+ /*
+ * We don't error out of the recovery process
+ * here, since the relevant journal entry may be
+ * found on a different device, and missing or
+ * no journal entries will be handled later
+ */
+ return 0;
+ }
j = buf->data;
}
@@ -990,6 +997,8 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
done:
rcu_read_unlock();
+ BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
+
return replicas >= c->opts.metadata_replicas_required ? 0 : -EROFS;
}
@@ -1050,9 +1059,13 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
return;
memcpy(new_buf, buf->data, buf->buf_size);
- kvpfree(buf->data, buf->buf_size);
- buf->data = new_buf;
- buf->buf_size = new_size;
+
+ spin_lock(&j->lock);
+ swap(buf->data, new_buf);
+ swap(buf->buf_size, new_size);
+ spin_unlock(&j->lock);
+
+ kvpfree(new_buf, new_size);
}
static inline struct journal_buf *journal_last_unwritten_buf(struct journal *j)
diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h
index 67ee47eb..9953663e 100644
--- a/libbcachefs/journal_types.h
+++ b/libbcachefs/journal_types.h
@@ -20,7 +20,7 @@
struct journal_buf {
struct jset *data;
- BKEY_PADDED(key);
+ __BKEY_PADDED(key, BCH_REPLICAS_MAX);
struct closure_waitlist wait;
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index 96c8690a..6241ff0c 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -4,7 +4,7 @@
*/
#include "bcachefs.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "btree_update_interior.h"
#include "buckets.h"
@@ -41,10 +41,10 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
int ret = 0;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
@@ -57,7 +57,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
continue;
}
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
ret = drop_dev_ptrs(c, bkey_i_to_s(sk.k),
dev_idx, flags, false);
@@ -90,7 +90,7 @@ static int __bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags
}
ret = bch2_trans_exit(&trans) ?: ret;
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
BUG_ON(ret == -EINTR);
@@ -109,6 +109,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
struct btree_iter *iter;
struct closure cl;
struct btree *b;
+ struct bkey_buf k;
unsigned id;
int ret;
@@ -116,28 +117,28 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
if (flags & BCH_FORCE_IF_METADATA_LOST)
return -EINVAL;
+ bch2_bkey_buf_init(&k);
bch2_trans_init(&trans, c, 0, 0);
closure_init_stack(&cl);
for (id = 0; id < BTREE_ID_NR; id++) {
for_each_btree_node(&trans, iter, id, POS_MIN,
BTREE_ITER_PREFETCH, b) {
- __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
retry:
if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key),
dev_idx))
continue;
- bkey_copy(&tmp.k, &b->key);
+ bch2_bkey_buf_copy(&k, c, &b->key);
- ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.k),
+ ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
dev_idx, flags, true);
if (ret) {
bch_err(c, "Cannot drop device without losing data");
goto err;
}
- ret = bch2_btree_node_update_key(c, iter, b, &tmp.k);
+ ret = bch2_btree_node_update_key(c, iter, b, k.k);
if (ret == -EINTR) {
b = bch2_btree_iter_peek_node(iter);
goto retry;
@@ -157,6 +158,7 @@ retry:
ret = 0;
err:
ret = bch2_trans_exit(&trans) ?: ret;
+ bch2_bkey_buf_exit(&k, c);
BUG_ON(ret == -EINTR);
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 6633d21f..9505eab9 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -2,7 +2,7 @@
#include "bcachefs.h"
#include "alloc_foreground.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_gc.h"
#include "btree_update.h"
#include "btree_update_interior.h"
@@ -61,8 +61,13 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct migrate_write *m =
container_of(op, struct migrate_write, op);
struct keylist *keys = &op->insert_keys;
+ struct bkey_buf _new, _insert;
int ret = 0;
+ bch2_bkey_buf_init(&_new);
+ bch2_bkey_buf_init(&_insert);
+ bch2_bkey_buf_realloc(&_insert, c, U8_MAX);
+
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
iter = bch2_trans_get_iter(&trans, m->btree_id,
@@ -73,21 +78,18 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
struct bkey_s_c k;
struct bkey_i *insert;
struct bkey_i_extent *new;
- BKEY_PADDED(k) _new, _insert;
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
bool did_work = false;
- int nr;
+ bool extending = false, should_check_enospc;
+ s64 i_sectors_delta = 0, disk_sectors_delta = 0;
bch2_trans_reset(&trans, 0);
k = bch2_btree_iter_peek_slot(iter);
ret = bkey_err(k);
- if (ret) {
- if (ret == -EINTR)
- continue;
- break;
- }
+ if (ret)
+ goto err;
new = bkey_i_to_extent(bch2_keylist_front(keys));
@@ -95,11 +97,11 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
!bch2_bkey_matches_ptr(c, k, m->ptr, m->offset))
goto nomatch;
- bkey_reassemble(&_insert.k, k);
- insert = &_insert.k;
+ bkey_reassemble(_insert.k, k);
+ insert = _insert.k;
- bkey_copy(&_new.k, bch2_keylist_front(keys));
- new = bkey_i_to_extent(&_new.k);
+ bch2_bkey_buf_copy(&_new, c, bch2_keylist_front(keys));
+ new = bkey_i_to_extent(_new.k);
bch2_cut_front(iter->pos, &new->k_i);
bch2_cut_front(iter->pos, insert);
@@ -144,23 +146,21 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
op->opts.background_target,
op->opts.data_replicas);
- /*
- * If we're not fully overwriting @k, and it's compressed, we
- * need a reservation for all the pointers in @insert
- */
- nr = bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(insert)) -
- m->nr_ptrs_reserved;
+ ret = bch2_sum_sector_overwrites(&trans, iter, insert,
+ &extending,
+ &should_check_enospc,
+ &i_sectors_delta,
+ &disk_sectors_delta);
+ if (ret)
+ goto err;
- if (insert->k.size < k.k->size &&
- bch2_bkey_sectors_compressed(k) &&
- nr > 0) {
+ if (disk_sectors_delta > (s64) &op->res.sectors) {
ret = bch2_disk_reservation_add(c, &op->res,
- keylist_sectors(keys) * nr, 0);
+ disk_sectors_delta - op->res.sectors,
+ !should_check_enospc
+ ? BCH_DISK_RESERVATION_NOFAIL : 0);
if (ret)
goto out;
-
- m->nr_ptrs_reserved += nr;
- goto next;
}
bch2_trans_update(&trans, iter, insert, 0);
@@ -168,8 +168,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
ret = bch2_trans_commit(&trans, &op->res,
op_journal_seq(op),
BTREE_INSERT_NOFAIL|
- BTREE_INSERT_USE_RESERVE|
m->data_opts.btree_insert_flags);
+err:
if (!ret)
atomic_long_inc(&c->extent_migrate_done);
if (ret == -EINTR)
@@ -197,6 +197,8 @@ nomatch:
}
out:
bch2_trans_exit(&trans);
+ bch2_bkey_buf_exit(&_insert, c);
+ bch2_bkey_buf_exit(&_new, c);
BUG_ON(ret == -EINTR);
return ret;
}
@@ -516,7 +518,7 @@ static int __bch2_move_data(struct bch_fs *c,
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
- struct bkey_on_stack sk;
+ struct bkey_buf sk;
struct btree_trans trans;
struct btree_iter *iter;
struct bkey_s_c k;
@@ -525,7 +527,7 @@ static int __bch2_move_data(struct bch_fs *c,
u64 delay, cur_inum = U64_MAX;
int ret = 0, ret2;
- bkey_on_stack_init(&sk);
+ bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
stats->data_type = BCH_DATA_user;
@@ -605,13 +607,19 @@ peek:
}
/* unlock before doing IO: */
- bkey_on_stack_reassemble(&sk, c, k);
+ bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);
if (ret2) {
+ if (ret2 == -EINTR) {
+ bch2_trans_reset(&trans, 0);
+ bch2_trans_cond_resched(&trans);
+ continue;
+ }
+
if (ret2 == -ENOMEM) {
/* memory allocation failure, wait for some IO to finish */
bch2_move_ctxt_wait_for_io(ctxt);
@@ -633,7 +641,7 @@ next_nondata:
}
out:
ret = bch2_trans_exit(&trans) ?: ret;
- bkey_on_stack_exit(&sk, c);
+ bch2_bkey_buf_exit(&sk, c);
return ret;
}
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index 2c5daed5..efa7f38e 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -200,6 +200,11 @@ static int bch2_copygc(struct bch_fs *c)
return -1;
}
+ /*
+ * Our btree node allocations also come out of RESERVE_MOVINGGC:
+ */
+ sectors_to_move = (sectors_to_move * 3) / 4;
+
for (i = h->data; i < h->data + h->used; i++)
sectors_to_move += i->sectors * i->replicas;
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 1883a1fa..5a43682c 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "bkey_buf.h"
#include "alloc_background.h"
#include "btree_gc.h"
#include "btree_update.h"
@@ -224,28 +225,29 @@ static int bch2_btree_and_journal_walk_recurse(struct bch_fs *c, struct btree *b
if (b->c.level) {
struct btree *child;
- BKEY_PADDED(k) tmp;
+ struct bkey_buf tmp;
- bkey_reassemble(&tmp.k, k);
- k = bkey_i_to_s_c(&tmp.k);
+ bch2_bkey_buf_init(&tmp);
+ bch2_bkey_buf_reassemble(&tmp, c, k);
+ k = bkey_i_to_s_c(tmp.k);
bch2_btree_and_journal_iter_advance(&iter);
- if (b->c.level > 0) {
- child = bch2_btree_node_get_noiter(c, &tmp.k,
- b->c.btree_id, b->c.level - 1);
- ret = PTR_ERR_OR_ZERO(child);
- if (ret)
- break;
+ child = bch2_btree_node_get_noiter(c, tmp.k,
+ b->c.btree_id, b->c.level - 1);
+ bch2_bkey_buf_exit(&tmp, c);
- ret = (node_fn ? node_fn(c, b) : 0) ?:
- bch2_btree_and_journal_walk_recurse(c, child,
- journal_keys, btree_id, node_fn, key_fn);
- six_unlock_read(&child->c.lock);
+ ret = PTR_ERR_OR_ZERO(child);
+ if (ret)
+ break;
- if (ret)
- break;
- }
+ ret = (node_fn ? node_fn(c, b) : 0) ?:
+ bch2_btree_and_journal_walk_recurse(c, child,
+ journal_keys, btree_id, node_fn, key_fn);
+ six_unlock_read(&child->c.lock);
+
+ if (ret)
+ break;
} else {
bch2_btree_and_journal_iter_advance(&iter);
}
@@ -936,7 +938,7 @@ int bch2_fs_recovery(struct bch_fs *c)
struct bch_sb_field_clean *clean = NULL;
struct jset *last_journal_entry = NULL;
u64 blacklist_seq, journal_seq;
- bool write_sb = false, need_write_alloc = false;
+ bool write_sb = false;
int ret;
if (c->sb.clean)
@@ -1082,10 +1084,8 @@ use_clean:
bch_info(c, "starting metadata mark and sweep");
err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true, true);
- if (ret < 0)
- goto err;
if (ret)
- need_write_alloc = true;
+ goto err;
bch_verbose(c, "mark and sweep done");
}
@@ -1095,10 +1095,8 @@ use_clean:
bch_info(c, "starting mark and sweep");
err = "error in mark and sweep";
ret = bch2_gc(c, &c->journal_keys, true, false);
- if (ret < 0)
- goto err;
if (ret)
- need_write_alloc = true;
+ goto err;
bch_verbose(c, "mark and sweep done");
}
@@ -1122,7 +1120,8 @@ use_clean:
goto err;
bch_verbose(c, "journal replay done");
- if (need_write_alloc && !c->opts.nochanges) {
+ if (test_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags) &&
+ !c->opts.nochanges) {
/*
* note that even when filesystem was clean there might be work
* to do here, if we ran gc (because of fsck) which recalculated
@@ -1137,8 +1136,6 @@ use_clean:
goto err;
}
bch_verbose(c, "alloc write done");
-
- set_bit(BCH_FS_ALLOC_WRITTEN, &c->flags);
}
if (!c->sb.clean) {
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index 8abcbfb3..930547de 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
-#include "bkey_on_stack.h"
+#include "bkey_buf.h"
#include "btree_update.h"
#include "extents.h"
#include "inode.h"
@@ -198,8 +198,7 @@ s64 bch2_remap_range(struct bch_fs *c,
struct btree_trans trans;
struct btree_iter *dst_iter, *src_iter;
struct bkey_s_c src_k;
- BKEY_PADDED(k) new_dst;
- struct bkey_on_stack new_src;
+ struct bkey_buf new_dst, new_src;
struct bpos dst_end = dst_start, src_end = src_start;
struct bpos dst_want, src_want;
u64 src_done, dst_done;
@@ -216,7 +215,8 @@ s64 bch2_remap_range(struct bch_fs *c,
dst_end.offset += remap_sectors;
src_end.offset += remap_sectors;
- bkey_on_stack_init(&new_src);
+ bch2_bkey_buf_init(&new_dst);
+ bch2_bkey_buf_init(&new_src);
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
src_iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS, src_start,
@@ -257,7 +257,7 @@ s64 bch2_remap_range(struct bch_fs *c,
break;
if (src_k.k->type != KEY_TYPE_reflink_p) {
- bkey_on_stack_reassemble(&new_src, c, src_k);
+ bch2_bkey_buf_reassemble(&new_src, c, src_k);
src_k = bkey_i_to_s_c(new_src.k);
bch2_cut_front(src_iter->pos, new_src.k);
@@ -275,7 +275,7 @@ s64 bch2_remap_range(struct bch_fs *c,
struct bkey_s_c_reflink_p src_p =
bkey_s_c_to_reflink_p(src_k);
struct bkey_i_reflink_p *dst_p =
- bkey_reflink_p_init(&new_dst.k);
+ bkey_reflink_p_init(new_dst.k);
u64 offset = le64_to_cpu(src_p.v->idx) +
(src_iter->pos.offset -
@@ -286,12 +286,12 @@ s64 bch2_remap_range(struct bch_fs *c,
BUG();
}
- new_dst.k.k.p = dst_iter->pos;
- bch2_key_resize(&new_dst.k.k,
+ new_dst.k->k.p = dst_iter->pos;
+ bch2_key_resize(&new_dst.k->k,
min(src_k.k->p.offset - src_iter->pos.offset,
dst_end.offset - dst_iter->pos.offset));
- ret = bch2_extent_update(&trans, dst_iter, &new_dst.k,
+ ret = bch2_extent_update(&trans, dst_iter, new_dst.k,
NULL, journal_seq,
new_i_size, i_sectors_delta);
if (ret)
@@ -333,7 +333,8 @@ err:
} while (ret2 == -EINTR);
ret = bch2_trans_exit(&trans) ?: ret;
- bkey_on_stack_exit(&new_src, c);
+ bch2_bkey_buf_exit(&new_src, c);
+ bch2_bkey_buf_exit(&new_dst, c);
percpu_ref_put(&c->writes);
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index cc13fc25..bfae0d71 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -798,7 +798,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
pr_buf(out,
"free_inc: %zu/%zu\n"
- "free[RESERVE_BTREE]: %zu/%zu\n"
"free[RESERVE_MOVINGGC]: %zu/%zu\n"
"free[RESERVE_NONE]: %zu/%zu\n"
"buckets:\n"
@@ -827,7 +826,6 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
"open_buckets_user: %u\n"
"btree reserve cache: %u\n",
fifo_used(&ca->free_inc), ca->free_inc.size,
- fifo_used(&ca->free[RESERVE_BTREE]), ca->free[RESERVE_BTREE].size,
fifo_used(&ca->free[RESERVE_MOVINGGC]), ca->free[RESERVE_MOVINGGC].size,
fifo_used(&ca->free[RESERVE_NONE]), ca->free[RESERVE_NONE].size,
ca->mi.nbuckets - ca->mi.first_bucket,