diff options
Diffstat (limited to 'fs/bcachefs/alloc_foreground.c')
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 284 |
1 files changed, 162 insertions, 122 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 1ecd635852a9..23a9fbb36f49 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -206,7 +206,8 @@ static inline bool may_alloc_bucket(struct bch_fs *c, static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct alloc_request *req, - u64 bucket, u8 gen) + u64 bucket, u8 gen, + struct closure *cl) { struct bch_dev *ca = req->ca; @@ -221,18 +222,12 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, spin_lock(&c->freelist_lock); if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) { - track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); - - int ret; - if (req->cl && !(req->flags & BCH_WRITE_alloc_nowait)) { - closure_wait(&c->open_buckets_wait, req->cl); - ret = bch_err_throw(c, open_bucket_alloc_blocked); - } else { - ret = bch_err_throw(c, open_buckets_empty); - } + if (cl) + closure_wait(&c->open_buckets_wait, cl); + track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); spin_unlock(&c->freelist_lock); - return ERR_PTR(ret); + return ERR_PTR(bch_err_throw(c, open_buckets_empty)); } /* Recheck under lock: */ @@ -264,7 +259,8 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct alloc_request *req, - struct btree_iter *freespace_iter) + struct btree_iter *freespace_iter, + struct closure *cl) { struct bch_fs *c = trans->c; u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); @@ -279,7 +275,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, if (ret) return NULL; - return __try_alloc_bucket(c, req, b, gen); + return __try_alloc_bucket(c, req, b, gen, cl); } /* @@ -287,7 +283,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, */ static noinline struct open_bucket * bch2_bucket_alloc_early(struct btree_trans *trans, - struct alloc_request *req) + struct alloc_request *req, + struct closure *cl) { struct bch_fs *c = trans->c; struct bch_dev *ca = req->ca; @@ -351,7 +348,7 @@ again: req->counters.buckets_seen++; ob = may_alloc_bucket(c, req, k.k->p) - ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen) + ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen, cl) : NULL; next: bch2_set_btree_iter_dontneed(trans, &citer); @@ -377,7 +374,8 @@ next: } static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - struct alloc_request *req) + struct alloc_request *req, + struct closure *cl) { struct bch_dev *ca = req->ca; struct btree_iter iter; @@ -419,7 +417,7 @@ again: goto next; } - ob = try_alloc_bucket(trans, req, &iter); + ob = try_alloc_bucket(trans, req, &iter, cl); if (ob) { if (!IS_ERR(ob)) *dev_alloc_cursor = iter.pos.offset; @@ -452,6 +450,7 @@ fail: static noinline void trace_bucket_alloc2(struct bch_fs *c, struct alloc_request *req, + struct closure *cl, struct open_bucket *ob) { struct printbuf buf = PRINTBUF; @@ -461,8 +460,7 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, prt_printf(&buf, "dev\t%s (%u)\n", req->ca->name, req->ca->dev_idx); prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[req->watermark]); prt_printf(&buf, "data type\t%s\n", __bch2_data_types[req->data_type]); - prt_printf(&buf, "blocking\t%u\n", !req->will_retry_target_devices && - !req->will_retry_all_devices); + prt_printf(&buf, "blocking\t%u\n", cl != NULL); prt_printf(&buf, "free\t%llu\n", req->usage.buckets[BCH_DATA_free]); prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark)); prt_printf(&buf, "copygc_wait\t%llu/%lli\n", @@ -490,23 +488,28 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, * bch2_bucket_alloc_trans - allocate a single bucket from a specific device * @trans: transaction object * @req: state for the entire allocation + * @cl: if not NULL, closure to be used to wait if buckets not available + * @nowait: if true, do not wait for buckets to become available * * Returns: an open_bucket on success, or an ERR_PTR() on failure. */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, - struct alloc_request *req) + struct alloc_request *req, + struct closure *cl, + bool nowait) { struct bch_fs *c = trans->c; struct bch_dev *ca = req->ca; struct open_bucket *ob = NULL; bool freespace = READ_ONCE(ca->mi.freespace_initialized); - bool waiting = false; + u64 avail; + bool waiting = nowait; req->btree_bitmap = req->data_type == BCH_DATA_btree; memset(&req->counters, 0, sizeof(req->counters)); again: bch2_dev_usage_read_fast(ca, &req->usage); - u64 avail = dev_buckets_free(ca, req->usage, req->watermark); + avail = dev_buckets_free(ca, req->usage, req->watermark); if (req->usage.buckets[BCH_DATA_need_discard] > avail) bch2_dev_do_discards(ca); @@ -522,12 +525,8 @@ again: c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) goto alloc; - if (!waiting && - req->cl && - !req->will_retry_target_devices && - !req->will_retry_all_devices && - !(req->flags & BCH_WRITE_alloc_nowait)) { - closure_wait(&c->freelist_wait, req->cl); + if (cl && !waiting) { + closure_wait(&c->freelist_wait, cl); waiting = true; goto again; } @@ -542,8 +541,8 @@ again: closure_wake_up(&c->freelist_wait); alloc: ob = likely(freespace) - ? bch2_bucket_alloc_freelist(trans, req) - : bch2_bucket_alloc_early(trans, req); + ? bch2_bucket_alloc_freelist(trans, req, cl) + : bch2_bucket_alloc_early(trans, req, cl); if (req->counters.need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); @@ -572,7 +571,7 @@ err: if (!IS_ERR(ob) ? trace_bucket_alloc_enabled() : trace_bucket_alloc_fail_enabled()) - trace_bucket_alloc2(c, req, ob); + trace_bucket_alloc2(c, req, cl, ob); return ob; } @@ -584,14 +583,13 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, { struct open_bucket *ob; struct alloc_request req = { - .cl = cl, .watermark = watermark, .data_type = data_type, .ca = ca, }; bch2_trans_do(c, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req))); + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); return ob; } @@ -705,26 +703,18 @@ static int add_new_bucket(struct bch_fs *c, return 0; } -int bch2_bucket_alloc_set_trans(struct btree_trans *trans, - struct alloc_request *req, - struct dev_stripe_state *stripe) +inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans, + struct alloc_request *req, + struct dev_stripe_state *stripe, + struct closure *cl) { struct bch_fs *c = trans->c; - struct closure *cl = NULL; int ret = 0; BUG_ON(req->nr_effective >= req->nr_replicas); - /* - * Try nonblocking first, so that if one device is full we'll try from - * other devices: - */ -retry_blocking: bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc, &req->devs_sorted); - if (req->devs_sorted.nr == 1) - req->will_retry_target_devices = false; - darray_for_each(req->devs_sorted, i) { req->ca = bch2_dev_tryget_noerror(c, *i); if (!req->ca) @@ -735,7 +725,8 @@ retry_blocking: continue; } - struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req); + struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req, cl, + req->flags & BCH_WRITE_alloc_nowait); if (!IS_ERR(ob)) bch2_dev_stripe_increment_inlined(req->ca, stripe, &req->usage); bch2_dev_put(req->ca); @@ -754,14 +745,6 @@ retry_blocking: if (ret == 1) return 0; - - if (ret && - !bch2_err_matches(ret, BCH_ERR_transaction_restart) && - req->will_retry_target_devices) { - req->will_retry_target_devices = false; - goto retry_blocking; - } - if (ret) return ret; return bch_err_throw(c, insufficient_devices); @@ -776,13 +759,20 @@ retry_blocking: */ static int bucket_alloc_from_stripe(struct btree_trans *trans, - struct alloc_request *req) + struct alloc_request *req, + struct closure *cl) { struct bch_fs *c = trans->c; int ret = 0; + if (req->nr_replicas < 2) + return 0; + + if (ec_open_bucket(c, &req->ptrs)) + return 0; + struct ec_stripe_head *h = - bch2_ec_stripe_head_get(trans, req, 0); + bch2_ec_stripe_head_get(trans, req, 0, cl); if (IS_ERR(h)) return PTR_ERR(h); if (!h) @@ -897,6 +887,79 @@ unlock: return ret; } +static int __open_bucket_add_buckets(struct btree_trans *trans, + struct alloc_request *req, + struct closure *_cl) +{ + struct bch_fs *c = trans->c; + struct open_bucket *ob; + struct closure *cl = NULL; + unsigned i; + int ret; + + req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target); + + /* Don't allocate from devices we already have pointers to: */ + darray_for_each(*req->devs_have, i) + __clear_bit(*i, req->devs_may_alloc.d); + + open_bucket_for_each(c, &req->ptrs, ob, i) + __clear_bit(ob->dev, req->devs_may_alloc.d); + + ret = bucket_alloc_set_writepoint(c, req); + if (ret) + return ret; + + ret = bucket_alloc_set_partial(c, req); + if (ret) + return ret; + + if (req->ec) { + ret = bucket_alloc_from_stripe(trans, req, _cl); + } else { +retry_blocking: + /* + * Try nonblocking first, so that if one device is full we'll try from + * other devices: + */ + ret = bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl); + if (ret && + !bch2_err_matches(ret, BCH_ERR_transaction_restart) && + !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && + !cl && _cl) { + cl = _cl; + goto retry_blocking; + } + } + + return ret; +} + +static int open_bucket_add_buckets(struct btree_trans *trans, + struct alloc_request *req, + struct closure *cl) +{ + int ret; + + if (req->ec && !ec_open_bucket(trans->c, &req->ptrs)) { + ret = __open_bucket_add_buckets(trans, req, cl); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_operation_blocked) || + bch2_err_matches(ret, BCH_ERR_freelist_empty) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + return ret; + if (req->nr_effective >= req->nr_replicas) + return 0; + } + + bool ec = false; + swap(ec, req->ec); + ret = __open_bucket_add_buckets(trans, req, cl); + swap(ec, req->ec); + + return ret < 0 ? ret : 0; +} + /** * should_drop_bucket - check if this is open_bucket should go away * @ob: open_bucket to predicate on @@ -1192,95 +1255,72 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) erasure_code = false; - if (nr_replicas < 2) - erasure_code = false; - - req->cl = cl; req->nr_replicas = nr_replicas; req->target = target; + req->ec = erasure_code; req->watermark = watermark; req->flags = flags; req->devs_have = devs_have; BUG_ON(!nr_replicas || !nr_replicas_required); retry: - req->ec = erasure_code; - req->will_retry_target_devices = true; - req->will_retry_all_devices = true; - req->ptrs.nr = 0; - req->nr_effective = 0; - req->have_cache = false; - write_points_nr = c->write_points_nr; + req->ptrs.nr = 0; + req->nr_effective = 0; + req->have_cache = false; + write_points_nr = c->write_points_nr; *wp_ret = req->wp = writepoint_find(trans, write_point.v); req->data_type = req->wp->data_type; - /* metadata may not allocate on cache devices: */ - if (req->data_type != BCH_DATA_user) - req->have_cache = true; - ret = bch2_trans_relock(trans); if (ret) goto err; - while (1) { - req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target); - - /* Don't allocate from devices we already have pointers to: */ - darray_for_each(*req->devs_have, i) - __clear_bit(*i, req->devs_may_alloc.d); - - open_bucket_for_each(c, &req->ptrs, ob, i) - __clear_bit(ob->dev, req->devs_may_alloc.d); - - ret = bucket_alloc_set_writepoint(c, req) ?: - bucket_alloc_set_partial(c, req) ?: - (req->ec - ? bucket_alloc_from_stripe(trans, req) - : bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe)); + /* metadata may not allocate on cache devices: */ + if (req->data_type != BCH_DATA_user) + req->have_cache = true; - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || - bch2_err_matches(ret, BCH_ERR_operation_blocked) || - bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) - goto err; - - if (ret == -BCH_ERR_freelist_empty || - ret == -BCH_ERR_insufficient_devices) { - if (req->will_retry_all_devices) { - BUG_ON(!req->will_retry_all_devices); - req->will_retry_all_devices = false; - /* - * Only try to allocate cache (durability = 0 devices) from the - * specified target: - */ - if (req->target && - (!(flags & BCH_WRITE_only_specified_devs) || - (ret == -BCH_ERR_insufficient_devices))) { - req->have_cache = true; - req->target = 0; - } - continue; + if (target && !(flags & BCH_WRITE_only_specified_devs)) { + ret = open_bucket_add_buckets(trans, req, NULL); + if (!ret || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) + goto alloc_done; + + /* Don't retry from all devices if we're out of open buckets: */ + if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { + int ret2 = open_bucket_add_buckets(trans, req, cl); + if (!ret2 || + bch2_err_matches(ret2, BCH_ERR_transaction_restart) || + bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { + ret = ret2; + goto alloc_done; } - - if (ret == -BCH_ERR_insufficient_devices && - req->nr_effective >= nr_replicas_required) - ret = 0; - else - goto err; } - if (req->nr_effective < req->nr_replicas && req->ec) { - req->ec = false; - req->will_retry_target_devices = true; - req->will_retry_all_devices = true; - continue; - } + /* + * Only try to allocate cache (durability = 0 devices) from the + * specified target: + */ + req->have_cache = true; + req->target = 0; - BUG_ON(req->nr_effective < nr_replicas_required); - BUG_ON(ret < 0); - break; + ret = open_bucket_add_buckets(trans, req, cl); + } else { + ret = open_bucket_add_buckets(trans, req, cl); } +alloc_done: + BUG_ON(!ret && req->nr_effective < req->nr_replicas); + + if (erasure_code && !ec_open_bucket(c, &req->ptrs)) + pr_debug("failed to get ec bucket: ret %u", ret); + + if (ret == -BCH_ERR_insufficient_devices && + req->nr_effective >= nr_replicas_required) + ret = 0; + + if (ret) + goto err; if (req->nr_effective > req->nr_replicas) deallocate_extra_replicas(c, req); |