diff options
-rw-r--r-- | .bcachefs_revision | 2 | ||||
-rw-r--r-- | libbcachefs/bcachefs.h | 17 | ||||
-rw-r--r-- | libbcachefs/btree_cache.c | 4 | ||||
-rw-r--r-- | libbcachefs/btree_io.c | 2 | ||||
-rw-r--r-- | libbcachefs/btree_iter.c | 66 | ||||
-rw-r--r-- | libbcachefs/btree_iter.h | 6 | ||||
-rw-r--r-- | libbcachefs/btree_trans_commit.c | 2 | ||||
-rw-r--r-- | libbcachefs/btree_update_interior.c | 14 | ||||
-rw-r--r-- | libbcachefs/data_update.c | 9 | ||||
-rw-r--r-- | libbcachefs/data_update.h | 1 | ||||
-rw-r--r-- | libbcachefs/extents.c | 17 | ||||
-rw-r--r-- | libbcachefs/extents.h | 1 | ||||
-rw-r--r-- | libbcachefs/fs.c | 10 | ||||
-rw-r--r-- | libbcachefs/move.c | 16 | ||||
-rw-r--r-- | libbcachefs/rebalance.c | 93 | ||||
-rw-r--r-- | libbcachefs/sb-counters_format.h | 3 |
16 files changed, 179 insertions, 84 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index 9ba5b364..abe206fd 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -62ab4bbc52902916e1f22b642968a09deb9c1a23 +933c0b52a810e410c1c871dacaaaa0f6a5d67f62 diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index cdf593c5..16d08dfb 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -386,14 +386,6 @@ do { \ ##__VA_ARGS__, bch2_err_str(_ret)); \ } while (0) -static inline int __bch2_err_trace(struct bch_fs *c, int err) -{ - trace_error_throw(c, err, _THIS_IP_); - return err; -} - -#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) - /* Parameters that are useful for debugging, but should always be compiled in: */ #define BCH_DEBUG_PARAMS_ALWAYS() \ BCH_DEBUG_PARAM(key_merging_disabled, \ @@ -1153,6 +1145,15 @@ struct bch_fs { struct mutex fsck_error_counts_lock; }; +static inline int __bch2_err_trace(struct bch_fs *c, int err) +{ + this_cpu_inc(c->counters[BCH_COUNTER_error_throw]); + trace_error_throw(c, err, _THIS_IP_); + return err; +} + +#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err) + extern struct wait_queue_head bch2_read_only_wait; static inline bool bch2_ro_ref_tryget(struct bch_fs *c) diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index 9261ad04..3b1d694d 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -804,7 +804,7 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea goto got_node; } - b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); + b = __btree_node_mem_alloc(c, GFP_NOWAIT); if (b) { bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT); } else { @@ -842,7 +842,7 @@ got_node: mutex_unlock(&bc->lock); - if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { + if (btree_node_data_alloc(c, b, GFP_NOWAIT)) { bch2_trans_unlock(trans); if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) goto err; diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c index 8a03cd75..276cf088 100644 --- a/libbcachefs/btree_io.c +++ b/libbcachefs/btree_io.c @@ -131,7 +131,7 @@ static void *btree_bounce_alloc(struct bch_fs *c, size_t size, BUG_ON(size > c->opts.btree_node_size); *used_mempool = false; - p = kvmalloc(size, __GFP_NOWARN|GFP_NOWAIT); + p = kvmalloc(size, GFP_NOWAIT); if (!p) { *used_mempool = true; p = mempool_alloc(&c->btree_bounce_pool, GFP_NOFS); diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index 1f30326f..8962c481 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2450,10 +2450,27 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en continue; } - if (bkey_extent_whiteout(k.k) && - !(iter->flags & BTREE_ITER_nofilter_whiteouts)) { - search_key = bkey_successor(iter, k.k->p); - continue; + if (!(iter->flags & BTREE_ITER_nofilter_whiteouts)) { + /* + * KEY_TYPE_extent_whiteout indicates that there + * are no extents that overlap with this + * whiteout - meaning bkey_start_pos() is + * monotonically increasing when including + * KEY_TYPE_extent_whiteout (not + * KEY_TYPE_whiteout). + * + * Without this @end wouldn't be able to + * terminate searches and we'd have to scan + * through tons of whiteouts: + */ + if (k.k->type == KEY_TYPE_extent_whiteout && + bkey_ge(k.k->p, end)) + goto end; + + if (bkey_extent_whiteout(k.k)) { + search_key = bkey_successor(iter, k.k->p); + continue; + } } } @@ -2878,31 +2895,40 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) EBUG_ON(btree_iter_path(trans, iter)->level); - if (iter->flags & BTREE_ITER_intent) { - struct btree_iter iter2; + struct btree_iter iter2; - bch2_trans_copy_iter(&iter2, iter); - k = bch2_btree_iter_peek_max(&iter2, end); + bch2_trans_copy_iter(&iter2, iter); + iter2.flags |= BTREE_ITER_nofilter_whiteouts; - if (k.k && !bkey_err(k)) { - swap(iter->key_cache_path, iter2.key_cache_path); - iter->k = iter2.k; - k.k = &iter->k; + while (1) { + k = bch2_btree_iter_peek_max(&iter2, end); + if ((iter2.flags & BTREE_ITER_is_extents) && + k.k && + !bkey_err(k) && + k.k->type == KEY_TYPE_whiteout) { + bch2_btree_iter_set_pos(&iter2, k.k->p); + continue; } - bch2_trans_iter_exit(&iter2); - } else { - struct bpos pos = iter->pos; - k = bch2_btree_iter_peek_max(iter, end); - if (unlikely(bkey_err(k))) - bch2_btree_iter_set_pos(iter, pos); - else - iter->pos = pos; + break; + } + + if (k.k && !bkey_err(k)) { + swap(iter->key_cache_path, iter2.key_cache_path); + iter->k = iter2.k; + k.k = &iter->k; } + bch2_trans_iter_exit(&iter2); if (unlikely(bkey_err(k))) goto out; + if (unlikely(k.k && + bkey_extent_whiteout(k.k) && + (iter->flags & BTREE_ITER_filter_snapshots) && + !(iter->flags & BTREE_ITER_nofilter_whiteouts))) + iter->k.type = KEY_TYPE_deleted; + next = k.k ? bkey_start_pos(k.k) : POS_MAX; if (bkey_lt(iter->pos, next)) { diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index b117cb5d..c8fc6ee0 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -954,7 +954,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); #define allocate_dropping_locks_errcode(_trans, _do) \ ({ \ - gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ + gfp_t _gfp = GFP_NOWAIT; \ int _ret = _do; \ \ if (bch2_err_matches(_ret, ENOMEM)) { \ @@ -966,7 +966,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); #define allocate_dropping_locks(_trans, _ret, _do) \ ({ \ - gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ + gfp_t _gfp = GFP_NOWAIT; \ typeof(_do) _p = _do; \ \ _ret = 0; \ @@ -979,7 +979,7 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *); #define allocate_dropping_locks_norelock(_trans, _lock_dropped, _do) \ ({ \ - gfp_t _gfp = GFP_NOWAIT|__GFP_NOWARN; \ + gfp_t _gfp = GFP_NOWAIT; \ typeof(_do) _p = _do; \ _lock_dropped = false; \ if (unlikely(!_p)) { \ diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 8b94a815..4d58bdb2 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -449,7 +449,7 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags return 0; new_u64s = roundup_pow_of_two(u64s); - new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); + new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOWAIT); if (unlikely(!new_k)) return btree_key_can_insert_cached_slowpath(trans, flags, path, new_u64s); diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 76897cf1..65ca54c5 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -336,6 +336,20 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, BUG_ON(b->ob.nr); mutex_lock(&c->btree_reserve_cache_lock); + if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) { + guard(spinlock)(&c->freelist_lock); + if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) { + if (cl) + closure_wait(&c->open_buckets_wait, cl); + + ret = cl + ? bch_err_throw(c, bucket_alloc_blocked) + : bch_err_throw(c, open_buckets_empty); + mutex_unlock(&c->btree_reserve_cache_lock); + goto err; + } + } + if (c->btree_reserve_cache_nr > nr_reserve) { for (struct btree_alloc *a = c->btree_reserve_cache; a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 01838a3a..0bd4dd06 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -693,6 +693,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans, if (ret) return ret; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (data_opts->kill_ec_ptrs & BIT(i)) + bch2_bkey_drop_ec(n, p.ptr.dev); + i++; + } + while (data_opts->kill_ptrs) { unsigned i = 0, drop = __fls(data_opts->kill_ptrs); diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h index 5e14d135..fc12aa65 100644 --- a/libbcachefs/data_update.h +++ b/libbcachefs/data_update.h @@ -12,6 +12,7 @@ struct moving_context; struct data_update_opts { unsigned rewrite_ptrs; unsigned kill_ptrs; + unsigned kill_ec_ptrs; u16 target; u8 extra_replicas; unsigned btree_insert_flags; diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index b879a586..7ab03987 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -995,6 +995,22 @@ void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) bch2_bkey_drop_ptrs_noerror(k, ptr, ptr->dev == dev); } +void bch2_bkey_drop_ec(struct bkey_i *k, unsigned dev) +{ + struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k)); + union bch_extent_entry *entry, *ec = NULL; + + bkey_extent_entry_for_each(ptrs, entry) { + if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) + ec = entry; + else if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_ptr && + entry->ptr.dev == dev) { + bch2_bkey_extent_entry_drop(k, ec); + return; + } + } +} + const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -1757,3 +1773,4 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k) memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64)); return -val_u64s_delta; } + diff --git a/libbcachefs/extents.h b/libbcachefs/extents.h index 35ee03cd..f6dcb171 100644 --- a/libbcachefs/extents.h +++ b/libbcachefs/extents.h @@ -650,6 +650,7 @@ void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *); void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned); void bch2_bkey_drop_device(struct bkey_s, unsigned); +void bch2_bkey_drop_ec(struct bkey_i *k, unsigned); #define bch2_bkey_drop_ptrs_noerror(_k, _ptr, _cond) \ do { \ diff --git a/libbcachefs/fs.c b/libbcachefs/fs.c index b5e3090f..52722a5e 100644 --- a/libbcachefs/fs.c +++ b/libbcachefs/fs.c @@ -268,7 +268,7 @@ restart: rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) { if (inode->ei_inum.inum == inum) { ret = darray_push_gfp(&subvols, inode->ei_inum.subvol, - GFP_NOWAIT|__GFP_NOWARN); + GFP_NOWAIT); if (ret) { rcu_read_unlock(); ret = darray_make_room(&subvols, 1); @@ -826,14 +826,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, bch2_inode_update_after_write(trans, inode, &inode_u, ATTR_MTIME); - if (inode_u.bi_subvol) { - /* - * Subvolume deletion is asynchronous, but we still want to tell - * the VFS that it's been deleted here: - */ - set_nlink(&inode->v, 0); - } - if (IS_CASEFOLDED(vdir)) d_invalidate(dentry); err: diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 30fe269d..932b62a9 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -344,7 +344,7 @@ int bch2_move_extent(struct moving_context *ctxt, if (!data_opts.rewrite_ptrs && !data_opts.extra_replicas && !data_opts.scrub) { - if (data_opts.kill_ptrs) { + if (data_opts.kill_ptrs|data_opts.kill_ec_ptrs) { this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size); return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts); } else { @@ -542,7 +542,7 @@ int bch2_move_ratelimit(struct moving_context *ctxt) if (ctxt->wait_on_copygc && c->copygc_running) { bch2_moving_ctxt_flush_all(ctxt); - wait_event_killable(c->copygc_running_wq, + wait_event_freezable(c->copygc_running_wq, !c->copygc_running || (is_kthread && kthread_should_stop())); } @@ -1280,7 +1280,17 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg, i++; } - return data_opts->kill_ptrs != 0; + i = 0; + bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) { + if (p.has_ec && durability - p.ec.redundancy >= replicas) { + data_opts->kill_ec_ptrs |= BIT(i); + durability -= p.ec.redundancy; + } + + i++; + } + + return (data_opts->kill_ptrs|data_opts->kill_ec_ptrs) != 0; } static bool scrub_pred(struct bch_fs *c, void *_arg, diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index c0c5fe96..17ca56b0 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -292,12 +292,48 @@ static int bch2_clear_rebalance_needs_scan(struct btree_trans *trans, u64 inum, : 0; } -static struct bkey_s_c next_rebalance_entry(struct btree_trans *trans, - struct btree_iter *work_iter) +#define REBALANCE_WORK_BUF_NR 1024 +DEFINE_DARRAY_NAMED(darray_rebalance_work, struct bkey_i_cookie); + +static struct bkey_i *next_rebalance_entry(struct btree_trans *trans, + darray_rebalance_work *buf, struct bpos *work_pos) { - return !kthread_should_stop() - ? bch2_btree_iter_peek(work_iter) - : bkey_s_c_null; + if (unlikely(!buf->nr)) { + /* + * Avoid contention with write buffer flush: buffer up rebalance + * work entries in a darray + */ + + BUG_ON(!buf->size);; + + bch2_trans_begin(trans); + + for_each_btree_key(trans, iter, BTREE_ID_rebalance_work, *work_pos, + BTREE_ITER_all_snapshots|BTREE_ITER_prefetch, k, ({ + /* we previously used darray_make_room */ + BUG_ON(bkey_bytes(k.k) > sizeof(buf->data[0])); + + bkey_reassemble(&darray_top(*buf).k_i, k); + buf->nr++; + + *work_pos = bpos_successor(iter.pos); + if (buf->nr == buf->size) + break; + 0; + })); + + if (!buf->nr) + return NULL; + + unsigned l = 0, r = buf->nr - 1; + while (l < r) { + swap(buf->data[l], buf->data[r]); + l++; + --r; + } + } + + return &(&darray_pop(buf))->k_i; } static int bch2_bkey_clear_needs_rebalance(struct btree_trans *trans, @@ -464,10 +500,9 @@ static int do_rebalance_scan(struct moving_context *ctxt, u64 inum, u64 cookie) per_snapshot_io_opts_init(&snapshot_io_opts, c); int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, - r->scan_start.pos, r->scan_end.pos, - BTREE_ITER_all_snapshots| - BTREE_ITER_not_extents| - BTREE_ITER_prefetch, k, ({ + r->scan_start.pos, r->scan_end.pos, + BTREE_ITER_all_snapshots| + BTREE_ITER_prefetch, k, ({ ctxt->stats->pos = BBPOS(iter.btree_id, iter.pos); struct bch_io_opts *io_opts = bch2_move_get_io_opts(trans, @@ -524,49 +559,37 @@ static int do_rebalance(struct moving_context *ctxt) struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; struct bch_fs_rebalance *r = &c->rebalance; - struct btree_iter extent_iter = { NULL }; - struct bkey_s_c k; + struct btree_iter extent_iter = {}; u32 kick = r->kick; - int ret = 0; - bch2_trans_begin(trans); + struct bpos work_pos = POS_MIN; + CLASS(darray_rebalance_work, work)(); + int ret = darray_make_room(&work, REBALANCE_WORK_BUF_NR); + if (ret) + return ret; bch2_move_stats_init(&r->work_stats, "rebalance_work"); bch2_move_stats_init(&r->scan_stats, "rebalance_scan"); - CLASS(btree_iter, rebalance_work_iter)(trans, - BTREE_ID_rebalance_work, POS_MIN, - BTREE_ITER_all_snapshots); - while (!bch2_move_ratelimit(ctxt)) { if (!bch2_rebalance_enabled(c)) { bch2_moving_ctxt_flush_all(ctxt); kthread_wait_freezable(bch2_rebalance_enabled(c) || kthread_should_stop()); + if (kthread_should_stop()) + break; } - if (kthread_should_stop()) + struct bkey_i *k = next_rebalance_entry(trans, &work, &work_pos); + if (!k) break; - bch2_trans_begin(trans); - - ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter)); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; - if (ret || !k.k) - break; - - ret = k.k->type == KEY_TYPE_cookie - ? do_rebalance_scan(ctxt, k.k->p.inode, - le64_to_cpu(bkey_s_c_to_cookie(k).v->cookie)) - : do_rebalance_extent(ctxt, k.k->p, &extent_iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - continue; + ret = k->k.type == KEY_TYPE_cookie + ? do_rebalance_scan(ctxt, k->k.p.inode, + le64_to_cpu(bkey_i_to_cookie(k)->v.cookie)) + : lockrestart_do(trans, do_rebalance_extent(ctxt, k->k.p, &extent_iter)); if (ret) break; - - bch2_btree_iter_advance(&rebalance_work_iter); } bch2_trans_iter_exit(&extent_iter); diff --git a/libbcachefs/sb-counters_format.h b/libbcachefs/sb-counters_format.h index f3ea53a5..740859c7 100644 --- a/libbcachefs/sb-counters_format.h +++ b/libbcachefs/sb-counters_format.h @@ -101,7 +101,8 @@ enum counters_flags { x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \ x(trans_restart_split_race, 76, TYPE_COUNTER) \ x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \ - x(write_buffer_flush_sync, 78, TYPE_COUNTER) + x(write_buffer_flush_sync, 78, TYPE_COUNTER) \ + x(error_throw, 93, TYPE_COUNTER) enum bch_persistent_counters { #define x(t, n, ...) BCH_COUNTER_##t, |