summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2022-10-09 23:27:41 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2022-10-10 00:10:45 -0400
commit72add8822c47e5801d4ac6d42af8c5d9d7b4d3c9 (patch)
tree9e4c08aa14105b9d3c88116291b326352aec6065
parent8d6138baac3b4fcd715c34cf325ae11b01a4ca67 (diff)
Update bcachefs sources to 47ffed9fad bcachefs: bch2_btree_delete_range_trans() now uses peek_upto()
-rw-r--r--.bcachefs_revision2
-rw-r--r--include/linux/mm.h25
-rw-r--r--include/linux/rwsem.h1
-rw-r--r--include/linux/sched.h12
-rw-r--r--libbcachefs/backpointers.c155
-rw-r--r--libbcachefs/btree_io.c15
-rw-r--r--libbcachefs/btree_iter.c21
-rw-r--r--libbcachefs/btree_locking.c43
-rw-r--r--libbcachefs/btree_update.h4
-rw-r--r--libbcachefs/btree_update_interior.c204
-rw-r--r--libbcachefs/btree_update_interior.h1
-rw-r--r--libbcachefs/btree_update_leaf.c19
-rw-r--r--libbcachefs/data_update.c23
-rw-r--r--libbcachefs/data_update.h2
-rw-r--r--libbcachefs/debug.c22
-rw-r--r--libbcachefs/ec.c26
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/fs-io.c3
-rw-r--r--libbcachefs/move.c67
-rw-r--r--libbcachefs/super.c8
-rw-r--r--libbcachefs/util.c20
-rw-r--r--libbcachefs/util.h1
-rw-r--r--linux/kthread.c2
-rw-r--r--linux/shrinker.c22
24 files changed, 518 insertions, 181 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 83d5a7db..1c9c4ec1 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-cbccc6d8692fdd3af7d5db97a065af5a47bc733c
+47ffed9fad891300a610191602a10ecd1e857cce
diff --git a/include/linux/mm.h b/include/linux/mm.h
new file mode 100644
index 00000000..4bf80ba3
--- /dev/null
+++ b/include/linux/mm.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_MM_H
+#define _TOOLS_LINUX_MM_H
+
+#include <linux/types.h>
+
+struct sysinfo {
+ long uptime; /* Seconds since boot */
+ unsigned long loads[3]; /* 1, 5, and 15 minute load averages */
+ unsigned long totalram; /* Total usable main memory size */
+ unsigned long freeram; /* Available memory size */
+ unsigned long sharedram; /* Amount of shared memory */
+ unsigned long bufferram; /* Memory used by buffers */
+ unsigned long totalswap; /* Total swap space size */
+ unsigned long freeswap; /* swap space still available */
+ __u16 procs; /* Number of current processes */
+ __u16 pad; /* Explicit padding for m68k */
+ unsigned long totalhigh; /* Total high memory size */
+ unsigned long freehigh; /* Available high memory size */
+ __u32 mem_unit; /* Memory unit size in bytes */
+};
+
+extern void si_meminfo(struct sysinfo * val);
+
+#endif /* _TOOLS_LINUX_MM_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 9d70e6e2..f851d6a2 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -19,6 +19,7 @@ static inline void init_rwsem(struct rw_semaphore *lock)
}
#define down_read(l) pthread_rwlock_rdlock(&(l)->lock)
+#define down_read_killable(l) (pthread_rwlock_rdlock(&(l)->lock), 0)
#define down_read_trylock(l) (!pthread_rwlock_tryrdlock(&(l)->lock))
#define up_read(l) pthread_rwlock_unlock(&(l)->lock)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 48d20e29..ac6d27bb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -7,6 +7,7 @@
#include <linux/bug.h>
#include <linux/completion.h>
#include <linux/jiffies.h>
+#include <linux/rwsem.h>
#include <linux/time64.h>
#define TASK_RUNNING 0
@@ -88,6 +89,10 @@ struct task_struct {
pid_t pid;
struct bio_list *bio_list;
+
+ struct signal_struct {
+ struct rw_semaphore exec_update_lock;
+ } *signal, _signal;
};
extern __thread struct task_struct *current;
@@ -157,4 +162,11 @@ static inline void ktime_get_coarse_real_ts64(struct timespec64 *ts)
#define current_kernel_time64() current_kernel_time()
#define CURRENT_TIME (current_kernel_time())
+static inline unsigned int stack_trace_save_tsk(struct task_struct *task,
+ unsigned long *store, unsigned int size,
+ unsigned int skipnr)
+{
+ return 0;
+}
+
#endif /* __TOOLS_LINUX_SCHED_H */
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index 7e8b1301..ee7e610f 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -6,6 +6,8 @@
#include "btree_update.h"
#include "error.h"
+#include <linux/mm.h>
+
#define MAX_EXTENT_COMPRESS_RATIO_SHIFT 10
/*
@@ -802,6 +804,103 @@ err:
return ret;
}
+struct bbpos {
+ enum btree_id btree;
+ struct bpos pos;
+};
+
+static inline int bbpos_cmp(struct bbpos l, struct bbpos r)
+{
+ return cmp_int(l.btree, r.btree) ?: bpos_cmp(l.pos, r.pos);
+}
+
+static inline struct bbpos bbpos_successor(struct bbpos pos)
+{
+ if (bpos_cmp(pos.pos, SPOS_MAX)) {
+ pos.pos = bpos_successor(pos.pos);
+ return pos;
+ }
+
+ if (pos.btree != BTREE_ID_NR) {
+ pos.btree++;
+ pos.pos = POS_MIN;
+ return pos;
+ }
+
+ BUG();
+}
+
+#if 0
+static void bbpos_to_text(struct printbuf *out, struct bbpos pos)
+{
+ prt_str(out, bch2_btree_ids[pos.btree]);
+ prt_char(out, ':');
+ bch2_bpos_to_text(out, pos.pos);
+}
+#endif
+
+static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp)
+{
+ return (struct bbpos) {
+ .btree = bp.btree_id,
+ .pos = bp.pos,
+ };
+}
+
+int bch2_get_btree_in_memory_pos(struct btree_trans *trans,
+ unsigned btree_leaf_mask,
+ unsigned btree_interior_mask,
+ struct bbpos start, struct bbpos *end)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ struct sysinfo i;
+ size_t btree_nodes;
+ enum btree_id btree;
+ int ret = 0;
+
+ si_meminfo(&i);
+
+ btree_nodes = (i.totalram >> 1) / btree_bytes(trans->c);
+
+ for (btree = start.btree; btree < BTREE_ID_NR && !ret; btree++) {
+ unsigned depth = ((1U << btree) & btree_leaf_mask) ? 1 : 2;
+
+ if (!((1U << btree) & btree_leaf_mask) &&
+ !((1U << btree) & btree_interior_mask))
+ continue;
+
+ bch2_trans_node_iter_init(trans, &iter, btree,
+ btree == start.btree ? start.pos : POS_MIN,
+ 0, depth, 0);
+ /*
+ * for_each_btree_key_contineu() doesn't check the return value
+ * from bch2_btree_iter_advance(), which is needed when
+ * iterating over interior nodes where we'll see keys at
+ * SPOS_MAX:
+ */
+ do {
+ k = __bch2_btree_iter_peek_and_restart(trans, &iter, 0);
+ ret = bkey_err(k);
+ if (!k.k || ret)
+ break;
+
+ --btree_nodes;
+ if (!btree_nodes) {
+ end->btree = btree;
+ end->pos = k.k->p;
+ bch2_trans_iter_exit(trans, &iter);
+ return 0;
+ }
+ } while (bch2_btree_iter_advance(&iter));
+ bch2_trans_iter_exit(trans, &iter);
+ }
+
+ end->btree = BTREE_ID_NR;
+ end->pos = POS_MIN;
+ return ret;
+}
+
int bch2_check_extents_to_backpointers(struct bch_fs *c)
{
struct btree_trans trans;
@@ -845,19 +944,26 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
static int check_one_backpointer(struct btree_trans *trans,
struct bpos bucket,
- u64 *bp_offset)
+ u64 *bp_offset,
+ struct bbpos start,
+ struct bbpos end)
{
struct btree_iter iter;
struct bch_backpointer bp;
+ struct bbpos pos;
struct bkey_s_c k;
struct printbuf buf = PRINTBUF;
int ret;
- ret = bch2_get_next_backpointer(trans, bucket, -1,
- bp_offset, &bp);
+ ret = bch2_get_next_backpointer(trans, bucket, -1, bp_offset, &bp);
if (ret || *bp_offset == U64_MAX)
return ret;
+ pos = bp_to_bbpos(bp);
+ if (bbpos_cmp(pos, start) < 0 ||
+ bbpos_cmp(pos, end) > 0)
+ return 0;
+
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
ret = bkey_err(k);
if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
@@ -880,29 +986,52 @@ fsck_err:
return ret;
}
-int bch2_check_backpointers_to_extents(struct bch_fs *c)
+static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
+ struct bbpos start,
+ struct bbpos end)
{
- struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
int ret = 0;
- bch2_trans_init(&trans, c, 0, 0);
- for_each_btree_key(&trans, iter, BTREE_ID_alloc, POS_MIN,
+ for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
BTREE_ITER_PREFETCH, k, ret) {
u64 bp_offset = 0;
- while (!(ret = commit_do(&trans, NULL, NULL,
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_NOFAIL,
- check_one_backpointer(&trans, iter.pos, &bp_offset))) &&
+ while (!(ret = commit_do(trans, NULL, NULL,
+ BTREE_INSERT_LAZY_RW|
+ BTREE_INSERT_NOFAIL,
+ check_one_backpointer(trans, iter.pos, &bp_offset, start, end))) &&
bp_offset < U64_MAX)
bp_offset++;
if (ret)
break;
}
- bch2_trans_iter_exit(&trans, &iter);
- bch2_trans_exit(&trans);
+ bch2_trans_iter_exit(trans, &iter);
return ret < 0 ? ret : 0;
}
+
+int bch2_check_backpointers_to_extents(struct bch_fs *c)
+{
+ struct btree_trans trans;
+ struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end;
+ int ret;
+
+ bch2_trans_init(&trans, c, 0, 0);
+ while (1) {
+ ret = bch2_get_btree_in_memory_pos(&trans,
+ (1U << BTREE_ID_extents)|
+ (1U << BTREE_ID_reflink),
+ ~0,
+ start, &end) ?:
+ bch2_check_backpointers_to_extents_pass(&trans, start, end);
+ if (ret || end.btree == BTREE_ID_NR)
+ break;
+
+ start = bbpos_successor(end);
+ }
+ bch2_trans_exit(&trans);
+
+ return ret;
+}
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index 13ce2975..dd6b536c 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1913,6 +1913,8 @@ do_write:
u64s = bch2_sort_keys(i->start, &sort_iter, false);
le16_add_cpu(&i->u64s, u64s);
+ BUG_ON(!b->written && i->u64s != b->data->keys.u64s);
+
set_needs_whiteout(i, false);
/* do we have data to write? */
@@ -1922,6 +1924,10 @@ do_write:
bytes_to_write = vstruct_end(i) - data;
sectors_to_write = round_up(bytes_to_write, block_bytes(c)) >> 9;
+ if (!b->written &&
+ b->key.k.type == KEY_TYPE_btree_ptr_v2)
+ BUG_ON(btree_ptr_sectors_written(&b->key) != sectors_to_write);
+
memset(data + bytes_to_write, 0,
(sectors_to_write << 9) - bytes_to_write);
@@ -2010,11 +2016,6 @@ do_write:
b->written += sectors_to_write;
- if (wbio->wbio.first_btree_write &&
- b->key.k.type == KEY_TYPE_btree_ptr_v2)
- bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
- cpu_to_le16(b->written);
-
if (wbio->key.k.type == KEY_TYPE_btree_ptr_v2)
bkey_i_to_btree_ptr_v2(&wbio->key)->v.sectors_written =
cpu_to_le16(b->written);
@@ -2027,10 +2028,6 @@ do_write:
return;
err:
set_btree_node_noevict(b);
- if (!b->written &&
- b->key.k.type == KEY_TYPE_btree_ptr_v2)
- bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
- cpu_to_le16(sectors_to_write);
b->written += sectors_to_write;
nowrite:
btree_bounce_free(c, bytes, used_mempool, data);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 925ffb31..dffb0170 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -1850,10 +1850,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
struct bkey_s_c k, k2;
int ret;
- EBUG_ON(iter->path->cached || iter->path->level);
+ EBUG_ON(iter->path->cached);
bch2_btree_iter_verify(iter);
while (1) {
+ struct btree_path_level *l;
+
iter->path = bch2_btree_path_set_pos(trans, iter->path, search_key,
iter->flags & BTREE_ITER_INTENT,
btree_iter_ip_allocated(iter));
@@ -1866,9 +1868,18 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
goto out;
}
+ l = path_l(iter->path);
+
+ if (unlikely(!l->b)) {
+ /* No btree nodes at requested level: */
+ bch2_btree_iter_set_pos(iter, SPOS_MAX);
+ k = bkey_s_c_null;
+ goto out;
+ }
+
btree_path_set_should_be_locked(iter->path);
- k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
+ k = btree_path_level_peek_all(trans->c, l, &iter->k);
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
k.k &&
@@ -1889,7 +1900,7 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
: NULL;
if (next_update &&
bpos_cmp(next_update->k.p,
- k.k ? k.k->p : iter->path->l[0].b->key.k.p) <= 0) {
+ k.k ? k.k->p : l->b->key.k.p) <= 0) {
iter->k = next_update->k;
k = bkey_i_to_s_c(next_update);
}
@@ -1910,9 +1921,9 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
if (likely(k.k)) {
break;
- } else if (likely(bpos_cmp(iter->path->l[0].b->key.k.p, SPOS_MAX))) {
+ } else if (likely(bpos_cmp(l->b->key.k.p, SPOS_MAX))) {
/* Advance to next leaf node: */
- search_key = bpos_successor(iter->path->l[0].b->key.k.p);
+ search_key = bpos_successor(l->b->key.k.p);
} else {
/* End of btree: */
bch2_btree_iter_set_pos(iter, SPOS_MAX);
diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c
index f4340086..9a525d34 100644
--- a/libbcachefs/btree_locking.c
+++ b/libbcachefs/btree_locking.c
@@ -96,25 +96,26 @@ static noinline void print_chain(struct printbuf *out, struct lock_graph *g)
static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
{
- int ret;
-
if (i == g->g) {
trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
- ret = btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
+ return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
} else {
i->trans->lock_must_abort = true;
- ret = 0;
- }
-
- for (i = g->g + 1; i < g->g + g->nr; i++)
wake_up_process(i->trans->locking_wait.task);
- return ret;
+ return 0;
+ }
}
static noinline int break_cycle(struct lock_graph *g)
{
struct trans_waiting_for_lock *i;
+ /*
+ * We'd like to prioritize aborting transactions that have done less
+ * work - but it appears breaking cycles by telling other transactions
+ * to abort may still be buggy:
+ */
+#if 0
for (i = g->g; i < g->g + g->nr; i++) {
if (i->trans->lock_may_not_fail ||
i->trans->locking_wait.lock_want == SIX_LOCK_write)
@@ -130,7 +131,7 @@ static noinline int break_cycle(struct lock_graph *g)
return abort_lock(g, i);
}
-
+#endif
for (i = g->g; i < g->g + g->nr; i++) {
if (i->trans->lock_may_not_fail)
continue;
@@ -138,7 +139,29 @@ static noinline int break_cycle(struct lock_graph *g)
return abort_lock(g, i);
}
- BUG();
+ {
+ struct bch_fs *c = g->g->trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ bch_err(c, "cycle of nofail locks");
+
+ for (i = g->g; i < g->g + g->nr; i++) {
+ struct btree_trans *trans = i->trans;
+
+ bch2_btree_trans_to_text(&buf, trans);
+
+ prt_printf(&buf, "backtrace:");
+ prt_newline(&buf);
+ printbuf_indent_add(&buf, 2);
+ bch2_prt_backtrace(&buf, trans->locking_wait.task);
+ printbuf_indent_sub(&buf, 2);
+ prt_newline(&buf);
+ }
+
+ bch2_print_string_as_lines(KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ BUG();
+ }
}
static void lock_graph_pop(struct lock_graph *g)
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index 89941fb8..1c2e7b2b 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -8,8 +8,8 @@
struct bch_fs;
struct btree;
-void bch2_btree_node_lock_for_insert(struct btree_trans *, struct btree_path *,
- struct btree *);
+void bch2_btree_node_prep_for_write(struct btree_trans *,
+ struct btree_path *, struct btree *);
bool bch2_btree_bset_insert_key(struct btree_trans *, struct btree_path *,
struct btree *, struct btree_node_iter *,
struct bkey_i *);
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index 578ba747..b9661407 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -23,9 +23,9 @@
#include <linux/random.h>
#include <trace/events/bcachefs.h>
-static void bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
- struct btree_path *, struct btree *,
- struct keylist *, unsigned);
+static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
+ struct btree_path *, struct btree *,
+ struct keylist *, unsigned);
static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
@@ -37,8 +37,8 @@ static struct btree_path *get_unlocked_mut_path(struct btree_trans *trans,
path = bch2_path_get(trans, btree_id, pos, level + 1, level,
BTREE_ITER_NOPRESERVE|
- BTREE_ITER_INTENT, _THIS_IP_);
- path = bch2_btree_path_make_mut(trans, path, true, _THIS_IP_);
+ BTREE_ITER_INTENT, _RET_IP_);
+ path = bch2_btree_path_make_mut(trans, path, true, _RET_IP_);
bch2_btree_path_downgrade(trans, path);
__bch2_btree_path_unlock(trans, path);
return path;
@@ -195,6 +195,43 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
}
}
+static void bch2_btree_node_free_never_used(struct btree_update *as,
+ struct btree_trans *trans,
+ struct btree *b)
+{
+ struct bch_fs *c = as->c;
+ struct prealloc_nodes *p = &as->prealloc_nodes[b->c.lock.readers != NULL];
+ struct btree_path *path;
+ unsigned level = b->c.level;
+
+ BUG_ON(!list_empty(&b->write_blocked));
+ BUG_ON(b->will_make_reachable != (1UL|(unsigned long) as));
+
+ b->will_make_reachable = 0;
+ closure_put(&as->cl);
+
+ clear_btree_node_will_make_reachable(b);
+ clear_btree_node_accessed(b);
+ clear_btree_node_dirty_acct(c, b);
+ clear_btree_node_need_write(b);
+
+ mutex_lock(&c->btree_cache.lock);
+ list_del_init(&b->list);
+ bch2_btree_node_hash_remove(&c->btree_cache, b);
+ mutex_unlock(&c->btree_cache.lock);
+
+ BUG_ON(p->nr >= ARRAY_SIZE(p->b));
+ p->b[p->nr++] = b;
+
+ six_unlock_intent(&b->c.lock);
+
+ trans_for_each_path(trans, path)
+ if (path->l[level].b == b) {
+ btree_node_unlock(trans, path, level);
+ path->l[level].b = ERR_PTR(-BCH_ERR_no_btree_node_init);
+ }
+}
+
static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
@@ -392,8 +429,6 @@ static struct btree *__btree_root_alloc(struct btree_update *as,
btree_node_set_format(b, b->data->format);
bch2_btree_build_aux_trees(b);
-
- bch2_btree_update_add_new_node(as, b);
six_unlock_write(&b->c.lock);
return b;
@@ -859,6 +894,14 @@ static void bch2_btree_update_add_new_node(struct btree_update *as, struct btree
mutex_unlock(&c->btree_interior_update_lock);
btree_update_add_key(as, &as->new_keys, b);
+
+ if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
+ unsigned bytes = vstruct_end(&b->data->keys) - (void *) b->data;
+ unsigned sectors = round_up(bytes, block_bytes(c)) >> 9;
+
+ bkey_i_to_btree_ptr_v2(&b->key)->v.sectors_written =
+ cpu_to_le16(sectors);
+ }
}
/*
@@ -1026,23 +1069,23 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
nr_nodes[!!update_level] += 1 + split;
update_level++;
- if (!btree_path_node(path, update_level))
- break;
+ ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+ if (ret)
+ return ERR_PTR(ret);
- /*
- * XXX: figure out how far we might need to split,
- * instead of locking/reserving all the way to the root:
- */
- split = update_level + 1 < BTREE_MAX_DEPTH;
- }
+ if (!btree_path_node(path, update_level)) {
+ /* Allocating new root? */
+ nr_nodes[1] += split;
+ update_level = BTREE_MAX_DEPTH;
+ break;
+ }
- /* Might have to allocate a new root: */
- if (update_level < BTREE_MAX_DEPTH)
- nr_nodes[1] += 1;
+ if (bch2_btree_node_insert_fits(c, path->l[update_level].b,
+ BKEY_BTREE_PTR_U64s_MAX * (1 + split)))
+ break;
- ret = bch2_btree_path_upgrade(trans, path, U8_MAX);
- if (ret)
- return ERR_PTR(ret);
+ split = true;
+ }
if (flags & BTREE_INSERT_GC_LOCK_HELD)
lockdep_assert_held(&c->gc_lock);
@@ -1064,6 +1107,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
as->mode = BTREE_INTERIOR_NO_UPDATE;
as->took_gc_lock = !(flags & BTREE_INSERT_GC_LOCK_HELD);
as->btree_id = path->btree_id;
+ as->update_level = update_level;
INIT_LIST_HEAD(&as->list);
INIT_LIST_HEAD(&as->unwritten_list);
INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1191,7 +1235,6 @@ static void bch2_btree_set_root(struct btree_update *as,
struct btree *old;
trace_and_count(c, btree_node_set_root, c, b);
- BUG_ON(!b->written);
old = btree_node_root(c, b);
@@ -1315,8 +1358,6 @@ static struct btree *__btree_split_node(struct btree_update *as,
SET_BTREE_NODE_SEQ(n2->data, BTREE_NODE_SEQ(n1->data));
n2->key.k.p = n1->key.k.p;
- bch2_btree_update_add_new_node(as, n2);
-
set1 = btree_bset_first(n1);
set2 = btree_bset_first(n2);
@@ -1458,18 +1499,19 @@ static void btree_split_insert_keys(struct btree_update *as,
btree_node_interior_verify(as->c, b);
}
-static void btree_split(struct btree_update *as, struct btree_trans *trans,
- struct btree_path *path, struct btree *b,
- struct keylist *keys, unsigned flags)
+static int btree_split(struct btree_update *as, struct btree_trans *trans,
+ struct btree_path *path, struct btree *b,
+ struct keylist *keys, unsigned flags)
{
struct bch_fs *c = as->c;
struct btree *parent = btree_node_parent(path, b);
struct btree *n1, *n2 = NULL, *n3 = NULL;
struct btree_path *path1 = NULL, *path2 = NULL;
u64 start_time = local_clock();
+ int ret = 0;
BUG_ON(!parent && (b != btree_node_root(c, b)));
- BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
+ BUG_ON(parent && !btree_node_intent_locked(path, b->c.level + 1));
bch2_btree_interior_update_will_free_node(as, b);
@@ -1499,9 +1541,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_path_level_init(trans, path2, n2);
bch2_btree_update_add_new_node(as, n1);
-
- bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
- bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+ bch2_btree_update_add_new_node(as, n2);
/*
* Note that on recursive parent_keys == keys, so we
@@ -1524,9 +1564,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
n3->sib_u64s[0] = U16_MAX;
n3->sib_u64s[1] = U16_MAX;
- btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
+ bch2_btree_update_add_new_node(as, n3);
- bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+ btree_split_insert_keys(as, trans, path, n3, &as->parent_keys);
}
} else {
trace_and_count(c, btree_node_compact, c, b);
@@ -1541,8 +1581,6 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_update_add_new_node(as, n1);
- bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
-
if (parent)
bch2_keylist_add(&as->parent_keys, &n1->key);
}
@@ -1551,7 +1589,9 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (parent) {
/* Split a non root node */
- bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+ if (ret)
+ goto err;
} else if (n3) {
bch2_btree_set_root(as, trans, path, n3);
} else {
@@ -1559,11 +1599,16 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
bch2_btree_set_root(as, trans, path, n1);
}
- bch2_btree_update_get_open_buckets(as, n1);
- if (n2)
- bch2_btree_update_get_open_buckets(as, n2);
- if (n3)
+ if (n3) {
bch2_btree_update_get_open_buckets(as, n3);
+ bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
+ }
+ if (n2) {
+ bch2_btree_update_get_open_buckets(as, n2);
+ bch2_btree_node_write(c, n2, SIX_LOCK_intent, 0);
+ }
+ bch2_btree_update_get_open_buckets(as, n1);
+ bch2_btree_node_write(c, n1, SIX_LOCK_intent, 0);
/*
* The old node must be freed (in memory) _before_ unlocking the new
@@ -1584,7 +1629,7 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
if (n2)
six_unlock_intent(&n2->c.lock);
six_unlock_intent(&n1->c.lock);
-
+out:
if (path2) {
__bch2_btree_path_unlock(trans, path2);
bch2_path_put(trans, path2, true);
@@ -1600,6 +1645,14 @@ static void btree_split(struct btree_update *as, struct btree_trans *trans,
? BCH_TIME_btree_node_split
: BCH_TIME_btree_node_compact],
start_time);
+ return ret;
+err:
+ if (n3)
+ bch2_btree_node_free_never_used(as, trans, n3);
+ if (n2)
+ bch2_btree_node_free_never_used(as, trans, n2);
+ bch2_btree_node_free_never_used(as, trans, n1);
+ goto out;
}
static void
@@ -1634,22 +1687,30 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
* If a split occurred, this function will return early. This can only happen
* for leaf nodes -- inserts into interior nodes have to be atomic.
*/
-static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
- struct btree_path *path, struct btree *b,
- struct keylist *keys, unsigned flags)
+static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *trans,
+ struct btree_path *path, struct btree *b,
+ struct keylist *keys, unsigned flags)
{
struct bch_fs *c = as->c;
int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
+ int ret;
lockdep_assert_held(&c->gc_lock);
- BUG_ON(!btree_node_intent_locked(path, btree_node_root(c, b)->c.level));
+ BUG_ON(!btree_node_intent_locked(path, b->c.level));
BUG_ON(!b->c.level);
BUG_ON(!as || as->b);
bch2_verify_keylist_sorted(keys);
- bch2_btree_node_lock_for_insert(trans, path, b);
+ if (!(local_clock() & 63))
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
+
+ ret = bch2_btree_node_lock_write(trans, path, &b->c);
+ if (ret)
+ return ret;
+
+ bch2_btree_node_prep_for_write(trans, path, b);
if (!bch2_btree_node_insert_fits(c, b, bch2_keylist_u64s(keys))) {
bch2_btree_node_unlock_write(trans, path, b);
@@ -1675,9 +1736,16 @@ static void bch2_btree_insert_node(struct btree_update *as, struct btree_trans *
bch2_btree_node_unlock_write(trans, path, b);
btree_node_interior_verify(c, b);
- return;
+ return 0;
split:
- btree_split(as, trans, path, b, keys, flags);
+ /*
+ * We could attempt to avoid the transaction restart, by calling
+ * bch2_btree_path_upgrade() and allocating more nodes:
+ */
+ if (b->c.level >= as->update_level)
+ return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
+
+ return btree_split(as, trans, path, b, keys, flags);
}
int bch2_btree_split_leaf(struct btree_trans *trans,
@@ -1694,10 +1762,15 @@ int bch2_btree_split_leaf(struct btree_trans *trans,
if (IS_ERR(as))
return PTR_ERR(as);
- btree_split(as, trans, path, b, NULL, flags);
+ ret = btree_split(as, trans, path, b, NULL, flags);
+ if (ret) {
+ bch2_btree_update_free(as, trans);
+ return ret;
+ }
+
bch2_btree_update_done(as, trans);
- for (l = path->level + 1; btree_path_node(path, l) && !ret; l++)
+ for (l = path->level + 1; btree_node_intent_locked(path, l) && !ret; l++)
ret = bch2_foreground_maybe_merge(trans, path, l, flags);
return ret;
@@ -1823,8 +1896,6 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
btree_set_min(n, prev->data->min_key);
btree_set_max(n, next->data->max_key);
- bch2_btree_update_add_new_node(as, n);
-
n->data->format = new_f;
btree_node_set_format(n, new_f);
@@ -1834,13 +1905,13 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_btree_build_aux_trees(n);
six_unlock_write(&n->c.lock);
+ bch2_btree_update_add_new_node(as, n);
+
new_path = get_unlocked_mut_path(trans, path->btree_id, n->c.level, n->key.k.p);
six_lock_increment(&n->c.lock, SIX_LOCK_intent);
mark_btree_node_locked(trans, new_path, n->c.level, SIX_LOCK_intent);
bch2_btree_path_level_init(trans, new_path, n);
- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
bkey_init(&delete.k);
delete.k.p = prev->key.k.p;
bch2_keylist_add(&as->parent_keys, &delete);
@@ -1848,11 +1919,14 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
bch2_trans_verify_paths(trans);
- bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+ ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys, flags);
+ if (ret)
+ goto err_free_update;
bch2_trans_verify_paths(trans);
bch2_btree_update_get_open_buckets(as, n);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_btree_node_free_inmem(trans, path, b);
bch2_btree_node_free_inmem(trans, sib_path, m);
@@ -1873,6 +1947,10 @@ err:
bch2_path_put(trans, sib_path, true);
bch2_trans_verify_locks(trans);
return ret;
+err_free_update:
+ bch2_btree_node_free_never_used(as, trans, n);
+ bch2_btree_update_free(as, trans);
+ goto out;
}
/**
@@ -1913,17 +1991,18 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
trace_and_count(c, btree_node_rewrite, c, b);
- bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
-
if (parent) {
bch2_keylist_add(&as->parent_keys, &n->key);
- bch2_btree_insert_node(as, trans, iter->path, parent,
- &as->parent_keys, flags);
+ ret = bch2_btree_insert_node(as, trans, iter->path, parent,
+ &as->parent_keys, flags);
+ if (ret)
+ goto err;
} else {
bch2_btree_set_root(as, trans, iter->path, n);
}
bch2_btree_update_get_open_buckets(as, n);
+ bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
bch2_btree_node_free_inmem(trans, iter->path, b);
@@ -1931,10 +2010,15 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
six_unlock_intent(&n->c.lock);
bch2_btree_update_done(as, trans);
- bch2_path_put(trans, new_path, true);
out:
+ if (new_path)
+ bch2_path_put(trans, new_path, true);
bch2_btree_path_downgrade(trans, iter->path);
return ret;
+err:
+ bch2_btree_node_free_never_used(as, trans, n);
+ bch2_btree_update_free(as, trans);
+ goto out;
}
struct async_btree_rewrite {
diff --git a/libbcachefs/btree_update_interior.h b/libbcachefs/btree_update_interior.h
index 7af810df..dabe8159 100644
--- a/libbcachefs/btree_update_interior.h
+++ b/libbcachefs/btree_update_interior.h
@@ -52,6 +52,7 @@ struct btree_update {
unsigned took_gc_lock:1;
enum btree_id btree_id;
+ unsigned update_level;
struct disk_reservation disk_res;
struct journal_preres journal_preres;
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 08d7001f..af3fbfcc 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -56,9 +56,9 @@ static inline bool same_leaf_as_next(struct btree_trans *trans,
insert_l(&i[0])->b == insert_l(&i[1])->b;
}
-static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b)
+inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
+ struct btree_path *path,
+ struct btree *b)
{
struct bch_fs *c = trans->c;
@@ -77,14 +77,6 @@ static inline void bch2_btree_node_prep_for_write(struct btree_trans *trans,
bch2_btree_init_next(trans, b);
}
-void bch2_btree_node_lock_for_insert(struct btree_trans *trans,
- struct btree_path *path,
- struct btree *b)
-{
- bch2_btree_node_lock_write_nofail(trans, path, &b->c);
- bch2_btree_node_prep_for_write(trans, path, b);
-}
-
/* Inserting into a given leaf node (last stage of insert): */
/* Handle overwrites and do insert, for non extents: */
@@ -1631,7 +1623,7 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
int ret = 0;
bch2_trans_iter_init(trans, &iter, id, start, BTREE_ITER_INTENT);
- while ((k = bch2_btree_iter_peek(&iter)).k) {
+ while ((k = bch2_btree_iter_peek_upto(&iter, bpos_predecessor(end))).k) {
struct disk_reservation disk_res =
bch2_disk_reservation_init(trans->c, 0);
struct bkey_i delete;
@@ -1640,9 +1632,6 @@ int bch2_btree_delete_range_trans(struct btree_trans *trans, enum btree_id id,
if (ret)
goto err;
- if (bkey_cmp(iter.pos, end) >= 0)
- break;
-
bkey_init(&delete.k);
/*
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 3102166d..5ef35e3b 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -328,8 +328,9 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
i = 0;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
- if (p.ptr.cached)
- m->data_opts.rewrite_ptrs &= ~(1U << i);
+ if (((1U << i) & m->data_opts.rewrite_ptrs) &&
+ p.ptr.cached)
+ BUG();
if (!((1U << i) & m->data_opts.rewrite_ptrs))
bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev);
@@ -365,5 +366,23 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
m->op.nr_replicas = m->op.nr_replicas_required =
hweight32(m->data_opts.rewrite_ptrs) + m->data_opts.extra_replicas;
+
+ BUG_ON(!m->op.nr_replicas);
return 0;
}
+
+void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const struct bch_extent_ptr *ptr;
+ unsigned i = 0;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if ((opts->rewrite_ptrs & (1U << i)) && ptr->cached) {
+ opts->kill_ptrs |= 1U << i;
+ opts->rewrite_ptrs ^= 1U << i;
+ }
+
+ i++;
+ }
+}
diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h
index e6450545..6793aa57 100644
--- a/libbcachefs/data_update.h
+++ b/libbcachefs/data_update.h
@@ -10,6 +10,7 @@ struct moving_context;
struct data_update_opts {
unsigned rewrite_ptrs;
+ unsigned kill_ptrs;
u16 target;
u8 extra_replicas;
unsigned btree_insert_flags;
@@ -34,5 +35,6 @@ int bch2_data_update_init(struct bch_fs *, struct data_update *,
struct write_point_specifier,
struct bch_io_opts, struct data_update_opts,
enum btree_id, struct bkey_s_c);
+void bch2_data_update_opts_normalize(struct bkey_s_c, struct data_update_opts *);
#endif /* _BCACHEFS_DATA_UPDATE_H */
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index 1d2a1615..d87131f5 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -501,26 +501,6 @@ static const struct file_operations cached_btree_nodes_ops = {
.read = bch2_cached_btree_nodes_read,
};
-static int prt_backtrace(struct printbuf *out, struct task_struct *task)
-{
- unsigned long entries[32];
- unsigned i, nr_entries;
- int ret;
-
- ret = down_read_killable(&task->signal->exec_update_lock);
- if (ret)
- return ret;
-
- nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0);
- for (i = 0; i < nr_entries; i++) {
- prt_printf(out, "[<0>] %pB", (void *)entries[i]);
- prt_newline(out);
- }
-
- up_read(&task->signal->exec_update_lock);
- return 0;
-}
-
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
@@ -547,7 +527,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
prt_printf(&i->buf, "backtrace:");
prt_newline(&i->buf);
printbuf_indent_add(&i->buf, 2);
- prt_backtrace(&i->buf, trans->locking_wait.task);
+ bch2_prt_backtrace(&i->buf, trans->locking_wait.task);
printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index f902da01..d3fa2d7a 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -1403,10 +1403,8 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
int ret;
idx = get_existing_stripe(c, h);
- if (idx < 0) {
- bch_err(c, "failed to find an existing stripe");
+ if (idx < 0)
return -BCH_ERR_ENOSPC_stripe_reuse;
- }
h->s->have_existing_stripe = true;
ret = get_stripe_key(c, idx, &h->s->existing_stripe);
@@ -1444,21 +1442,9 @@ static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
static int __bch2_ec_stripe_head_reserve(struct bch_fs *c,
struct ec_stripe_head *h)
{
- int ret;
-
- ret = bch2_disk_reservation_get(c, &h->s->res,
- h->blocksize,
- h->s->nr_parity, 0);
-
- if (ret) {
- /*
- * This means we need to wait for copygc to
- * empty out buckets from existing stripes:
- */
- bch_err_ratelimited(c, "failed to reserve stripe: %s", bch2_err_str(ret));
- }
-
- return ret;
+ return bch2_disk_reservation_get(c, &h->s->res,
+ h->blocksize,
+ h->s->nr_parity, 0);
}
struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
@@ -1500,8 +1486,10 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
ret = __bch2_ec_stripe_head_reserve(c, h);
if (ret && needs_stripe_new)
ret = __bch2_ec_stripe_head_reuse(c, h);
- if (ret)
+ if (ret) {
+ bch_err_ratelimited(c, "failed to get stripe: %s", bch2_err_str(ret));
goto err;
+ }
if (!h->s->allocated) {
ret = new_stripe_alloc_buckets(c, h, cl);
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index fc0bb5f8..9f293040 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -42,6 +42,7 @@
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_raced) \
x(BCH_ERR_transaction_restart, transaction_restart_key_cache_realloced)\
x(BCH_ERR_transaction_restart, transaction_restart_journal_preres_get) \
+ x(BCH_ERR_transaction_restart, transaction_restart_split_race) \
x(BCH_ERR_transaction_restart, transaction_restart_nested) \
x(0, no_btree_node) \
x(BCH_ERR_no_btree_node, no_btree_node_relock) \
diff --git a/libbcachefs/fs-io.c b/libbcachefs/fs-io.c
index 7d45f486..fdd43686 100644
--- a/libbcachefs/fs-io.c
+++ b/libbcachefs/fs-io.c
@@ -2208,6 +2208,9 @@ err:
/* inode->i_dio_count is our ref on inode and thus bch_fs */
inode_dio_end(&inode->v);
+ if (ret < 0)
+ ret = bch2_err_class(ret);
+
if (!sync) {
req->ki_complete(req, ret);
ret = -EIOCBQUEUED;
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index e85c3143..4f4dfaa7 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -191,7 +191,52 @@ void bch_move_stats_init(struct bch_move_stats *stats, char *name)
scnprintf(stats->name, sizeof(stats->name), "%s", name);
}
+static int bch2_extent_drop_ptrs(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_s_c k,
+ struct data_update_opts data_opts)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_i *n;
+ int ret;
+
+ n = bch2_trans_kmalloc(trans, bkey_bytes(k.k));
+ ret = PTR_ERR_OR_ZERO(n);
+ if (ret)
+ return ret;
+
+ bkey_reassemble(n, k);
+
+ while (data_opts.kill_ptrs) {
+ unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+ struct bch_extent_ptr *ptr;
+
+ bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop);
+ data_opts.kill_ptrs ^= 1U << drop;
+ }
+
+ /*
+ * If the new extent no longer has any pointers, bch2_extent_normalize()
+ * will do the appropriate thing with it (turning it into a
+ * KEY_TYPE_error key, or just a discard if it was a cached extent)
+ */
+ bch2_extent_normalize(c, bkey_i_to_s(n));
+
+ /*
+ * Since we're not inserting through an extent iterator
+ * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators),
+ * we aren't using the extent overwrite path to delete, we're
+ * just using the normal key deletion path:
+ */
+ if (bkey_deleted(&n->k))
+ n->k.size = 0;
+
+ return bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
+ bch2_trans_commit(trans, NULL, NULL, BTREE_INSERT_NOFAIL);
+}
+
static int bch2_move_extent(struct btree_trans *trans,
+ struct btree_iter *iter,
struct moving_context *ctxt,
struct bch_io_opts io_opts,
enum btree_id btree_id,
@@ -206,6 +251,15 @@ static int bch2_move_extent(struct btree_trans *trans,
unsigned sectors = k.k->size, pages;
int ret = -ENOMEM;
+ bch2_data_update_opts_normalize(k, &data_opts);
+
+ if (!data_opts.rewrite_ptrs &&
+ !data_opts.extra_replicas) {
+ if (data_opts.kill_ptrs)
+ return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+ return 0;
+ }
+
if (!percpu_ref_tryget_live(&c->writes))
return -EROFS;
@@ -447,7 +501,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- ret2 = bch2_move_extent(&trans, ctxt, io_opts,
+ ret2 = bch2_move_extent(&trans, &iter, ctxt, io_opts,
btree_id, k, data_opts);
if (ret2) {
if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
@@ -544,7 +598,7 @@ again:
prt_str(&buf, "failed to evacuate bucket ");
bch2_bkey_val_to_text(&buf, c, k);
- bch2_trans_inconsistent(trans, "%s", buf.buf);
+ bch_err(c, "%s", buf.buf);
printbuf_exit(&buf);
}
}
@@ -599,11 +653,12 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_iter_exit(&trans, &iter);
ret = move_get_io_opts(&trans, &io_opts, k, &cur_inum);
- if (ret)
+ if (ret) {
+ bch2_trans_iter_exit(&trans, &iter);
continue;
+ }
data_opts = _data_opts;
data_opts.target = io_opts.background_target;
@@ -615,8 +670,10 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
i++;
}
- ret = bch2_move_extent(&trans, ctxt, io_opts,
+ ret = bch2_move_extent(&trans, &iter, ctxt, io_opts,
bp.btree_id, k, data_opts);
+ bch2_trans_iter_exit(&trans, &iter);
+
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret == -ENOMEM) {
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index a824e160..9df08289 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -1326,18 +1326,10 @@ static bool bch2_fs_may_start(struct bch_fs *c)
static void __bch2_dev_read_only(struct bch_fs *c, struct bch_dev *ca)
{
/*
- * Device going read only means the copygc reserve get smaller, so we
- * don't want that happening while copygc is in progress:
- */
- bch2_copygc_stop(c);
-
- /*
* The allocator thread itself allocates btree nodes, so stop it first:
*/
bch2_dev_allocator_remove(c, ca);
bch2_dev_journal_stop(&c->journal, ca);
-
- bch2_copygc_start(c);
}
static void __bch2_dev_read_write(struct bch_fs *c, struct bch_dev *ca)
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index 81befc43..d1919350 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -296,6 +296,26 @@ void bch2_print_string_as_lines(const char *prefix, const char *lines)
console_unlock();
}
+int bch2_prt_backtrace(struct printbuf *out, struct task_struct *task)
+{
+ unsigned long entries[32];
+ unsigned i, nr_entries;
+ int ret;
+
+ ret = down_read_killable(&task->signal->exec_update_lock);
+ if (ret)
+ return ret;
+
+ nr_entries = stack_trace_save_tsk(task, entries, ARRAY_SIZE(entries), 0);
+ for (i = 0; i < nr_entries; i++) {
+ prt_printf(out, "[<0>] %pB", (void *)entries[i]);
+ prt_newline(out);
+ }
+
+ up_read(&task->signal->exec_update_lock);
+ return 0;
+}
+
/* time stats: */
static void bch2_time_stats_update_one(struct time_stats *stats,
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index aa8b416a..a7f68e17 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -356,6 +356,7 @@ u64 bch2_read_flag_list(char *, const char * const[]);
void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
void bch2_print_string_as_lines(const char *prefix, const char *lines);
+int bch2_prt_backtrace(struct printbuf *, struct task_struct *);
#define NR_QUANTILES 15
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
diff --git a/linux/kthread.c b/linux/kthread.c
index 41bfca2f..3c7bdb81 100644
--- a/linux/kthread.c
+++ b/linux/kthread.c
@@ -71,8 +71,10 @@ struct task_struct *kthread_create(int (*thread_fn)(void *data),
p->thread_fn = thread_fn;
p->thread_data = thread_data;
p->state = TASK_UNINTERRUPTIBLE;
+ p->signal = &p->_signal;
atomic_set(&p->usage, 1);
init_completion(&p->exited);
+ init_rwsem(&p->_signal.exec_update_lock);
pthread_attr_t attr;
pthread_attr_init(&attr);
diff --git a/linux/shrinker.c b/linux/shrinker.c
index 13f0c4b9..25cdfbb6 100644
--- a/linux/shrinker.c
+++ b/linux/shrinker.c
@@ -2,6 +2,7 @@
#include <stdio.h>
#include <linux/list.h>
+#include <linux/mm.h>
#include <linux/mutex.h>
#include <linux/shrinker.h>
@@ -39,30 +40,29 @@ static u64 parse_meminfo_line(const char *line)
return v << 10;
}
-static struct meminfo read_meminfo(void)
+void si_meminfo(struct sysinfo *val)
{
- struct meminfo ret = { 0 };
size_t len, n = 0;
char *line = NULL;
const char *v;
FILE *f;
+ memset(val, 0, sizeof(*val));
+
f = fopen("/proc/meminfo", "r");
if (!f)
- return ret;
+ return;
while ((len = getline(&line, &n, f)) != -1) {
if ((v = strcmp_prefix(line, "MemTotal:")))
- ret.total = parse_meminfo_line(v);
+ val->totalram = parse_meminfo_line(v);
if ((v = strcmp_prefix(line, "MemAvailable:")))
- ret.available = parse_meminfo_line(v);
+ val->freeram = parse_meminfo_line(v);
}
fclose(f);
free(line);
-
- return ret;
}
static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
@@ -85,7 +85,7 @@ static void run_shrinkers_allocation_failed(gfp_t gfp_mask)
void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
{
struct shrinker *shrinker;
- struct meminfo info;
+ struct sysinfo info;
s64 want_shrink;
/* Fast out if there are no shrinkers to run. */
@@ -97,10 +97,10 @@ void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
return;
}
- info = read_meminfo();
+ si_meminfo(&info);
- if (info.total && info.available) {
- want_shrink = (info.total >> 2) - info.available;
+ if (info.totalram && info.freeram) {
+ want_shrink = (info.totalram >> 2) - info.freeram;
if (want_shrink <= 0)
return;