diff options
Diffstat (limited to 'fs/bcachefs/alloc_foreground.c')
-rw-r--r-- | fs/bcachefs/alloc_foreground.c | 299 |
1 files changed, 126 insertions, 173 deletions
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 23a9fbb36f49..df7a28cd8491 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -206,8 +206,7 @@ static inline bool may_alloc_bucket(struct bch_fs *c, static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, struct alloc_request *req, - u64 bucket, u8 gen, - struct closure *cl) + u64 bucket, u8 gen) { struct bch_dev *ca = req->ca; @@ -222,12 +221,18 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, spin_lock(&c->freelist_lock); if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(req->watermark))) { - if (cl) - closure_wait(&c->open_buckets_wait, cl); - track_event_change(&c->times[BCH_TIME_blocked_allocate_open_bucket], true); + + int ret; + if (req->cl && !(req->flags & BCH_WRITE_alloc_nowait)) { + closure_wait(&c->open_buckets_wait, req->cl); + ret = bch_err_throw(c, open_bucket_alloc_blocked); + } else { + ret = bch_err_throw(c, open_buckets_empty); + } + spin_unlock(&c->freelist_lock); - return ERR_PTR(bch_err_throw(c, open_buckets_empty)); + return ERR_PTR(ret); } /* Recheck under lock: */ @@ -259,8 +264,7 @@ static struct open_bucket *__try_alloc_bucket(struct bch_fs *c, static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct alloc_request *req, - struct btree_iter *freespace_iter, - struct closure *cl) + struct btree_iter *freespace_iter) { struct bch_fs *c = trans->c; u64 b = freespace_iter->pos.offset & ~(~0ULL << 56); @@ -275,7 +279,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, if (ret) return NULL; - return __try_alloc_bucket(c, req, b, gen, cl); + return __try_alloc_bucket(c, req, b, gen); } /* @@ -283,8 +287,7 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, */ static noinline struct open_bucket * bch2_bucket_alloc_early(struct btree_trans *trans, - struct alloc_request *req, - struct closure *cl) + struct alloc_request *req) { struct bch_fs *c = trans->c; struct bch_dev *ca = req->ca; @@ -348,7 +351,7 @@ again: req->counters.buckets_seen++; ob = may_alloc_bucket(c, req, k.k->p) - ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen, cl) + ? __try_alloc_bucket(c, req, k.k->p.offset, a->gen) : NULL; next: bch2_set_btree_iter_dontneed(trans, &citer); @@ -374,8 +377,7 @@ next: } static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans, - struct alloc_request *req, - struct closure *cl) + struct alloc_request *req) { struct bch_dev *ca = req->ca; struct btree_iter iter; @@ -417,7 +419,7 @@ again: goto next; } - ob = try_alloc_bucket(trans, req, &iter, cl); + ob = try_alloc_bucket(trans, req, &iter); if (ob) { if (!IS_ERR(ob)) *dev_alloc_cursor = iter.pos.offset; @@ -450,7 +452,6 @@ fail: static noinline void trace_bucket_alloc2(struct bch_fs *c, struct alloc_request *req, - struct closure *cl, struct open_bucket *ob) { struct printbuf buf = PRINTBUF; @@ -460,7 +461,8 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, prt_printf(&buf, "dev\t%s (%u)\n", req->ca->name, req->ca->dev_idx); prt_printf(&buf, "watermark\t%s\n", bch2_watermarks[req->watermark]); prt_printf(&buf, "data type\t%s\n", __bch2_data_types[req->data_type]); - prt_printf(&buf, "blocking\t%u\n", cl != NULL); + prt_printf(&buf, "blocking\t%u\n", !req->will_retry_target_devices && + !req->will_retry_all_devices); prt_printf(&buf, "free\t%llu\n", req->usage.buckets[BCH_DATA_free]); prt_printf(&buf, "avail\t%llu\n", dev_buckets_free(req->ca, req->usage, req->watermark)); prt_printf(&buf, "copygc_wait\t%llu/%lli\n", @@ -488,28 +490,23 @@ static noinline void trace_bucket_alloc2(struct bch_fs *c, * bch2_bucket_alloc_trans - allocate a single bucket from a specific device * @trans: transaction object * @req: state for the entire allocation - * @cl: if not NULL, closure to be used to wait if buckets not available - * @nowait: if true, do not wait for buckets to become available * * Returns: an open_bucket on success, or an ERR_PTR() on failure. */ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, - struct alloc_request *req, - struct closure *cl, - bool nowait) + struct alloc_request *req) { struct bch_fs *c = trans->c; struct bch_dev *ca = req->ca; struct open_bucket *ob = NULL; bool freespace = READ_ONCE(ca->mi.freespace_initialized); - u64 avail; - bool waiting = nowait; + bool waiting = false; req->btree_bitmap = req->data_type == BCH_DATA_btree; memset(&req->counters, 0, sizeof(req->counters)); again: bch2_dev_usage_read_fast(ca, &req->usage); - avail = dev_buckets_free(ca, req->usage, req->watermark); + u64 avail = dev_buckets_free(ca, req->usage, req->watermark); if (req->usage.buckets[BCH_DATA_need_discard] > avail) bch2_dev_do_discards(ca); @@ -525,8 +522,12 @@ again: c->recovery.pass_done < BCH_RECOVERY_PASS_check_allocations) goto alloc; - if (cl && !waiting) { - closure_wait(&c->freelist_wait, cl); + if (!waiting && + req->cl && + !req->will_retry_target_devices && + !req->will_retry_all_devices && + !(req->flags & BCH_WRITE_alloc_nowait)) { + closure_wait(&c->freelist_wait, req->cl); waiting = true; goto again; } @@ -541,8 +542,8 @@ again: closure_wake_up(&c->freelist_wait); alloc: ob = likely(freespace) - ? bch2_bucket_alloc_freelist(trans, req, cl) - : bch2_bucket_alloc_early(trans, req, cl); + ? bch2_bucket_alloc_freelist(trans, req) + : bch2_bucket_alloc_early(trans, req); if (req->counters.need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); @@ -571,7 +572,7 @@ err: if (!IS_ERR(ob) ? trace_bucket_alloc_enabled() : trace_bucket_alloc_fail_enabled()) - trace_bucket_alloc2(c, req, cl, ob); + trace_bucket_alloc2(c, req, ob); return ob; } @@ -583,13 +584,14 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca, { struct open_bucket *ob; struct alloc_request req = { + .cl = cl, .watermark = watermark, .data_type = data_type, .ca = ca, }; bch2_trans_do(c, - PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req, cl, false))); + PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, &req))); return ob; } @@ -703,18 +705,24 @@ static int add_new_bucket(struct bch_fs *c, return 0; } -inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans, - struct alloc_request *req, - struct dev_stripe_state *stripe, - struct closure *cl) +int bch2_bucket_alloc_set_trans(struct btree_trans *trans, + struct alloc_request *req, + struct dev_stripe_state *stripe) { struct bch_fs *c = trans->c; int ret = 0; BUG_ON(req->nr_effective >= req->nr_replicas); + /* + * Try nonblocking first, so that if one device is full we'll try from + * other devices: + */ +retry_blocking: bch2_dev_alloc_list(c, stripe, &req->devs_may_alloc, &req->devs_sorted); + req->will_retry_target_devices = req->devs_sorted.nr > 1; + darray_for_each(req->devs_sorted, i) { req->ca = bch2_dev_tryget_noerror(c, *i); if (!req->ca) @@ -725,29 +733,31 @@ inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans, continue; } - struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req, cl, - req->flags & BCH_WRITE_alloc_nowait); + struct open_bucket *ob = bch2_bucket_alloc_trans(trans, req); if (!IS_ERR(ob)) bch2_dev_stripe_increment_inlined(req->ca, stripe, &req->usage); bch2_dev_put(req->ca); - if (IS_ERR(ob)) { + if (IS_ERR(ob)) { /* don't squash error */ ret = PTR_ERR(ob); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl) - break; + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_operation_blocked) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + return ret; continue; } - ret = add_new_bucket(c, req, ob); - if (ret) - break; + if (add_new_bucket(c, req, ob)) + return 0; } - if (ret == 1) - return 0; - if (ret) - return ret; - return bch_err_throw(c, insufficient_devices); + if (bch2_err_matches(ret, BCH_ERR_freelist_empty) && + req->will_retry_target_devices) { + req->will_retry_target_devices = false; + goto retry_blocking; + } + + return ret ?: bch_err_throw(c, insufficient_devices); } /* Allocate from stripes: */ @@ -759,20 +769,13 @@ inline int bch2_bucket_alloc_set_trans(struct btree_trans *trans, */ static int bucket_alloc_from_stripe(struct btree_trans *trans, - struct alloc_request *req, - struct closure *cl) + struct alloc_request *req) { struct bch_fs *c = trans->c; int ret = 0; - if (req->nr_replicas < 2) - return 0; - - if (ec_open_bucket(c, &req->ptrs)) - return 0; - struct ec_stripe_head *h = - bch2_ec_stripe_head_get(trans, req, 0, cl); + bch2_ec_stripe_head_get(trans, req, 0); if (IS_ERR(h)) return PTR_ERR(h); if (!h) @@ -887,79 +890,6 @@ unlock: return ret; } -static int __open_bucket_add_buckets(struct btree_trans *trans, - struct alloc_request *req, - struct closure *_cl) -{ - struct bch_fs *c = trans->c; - struct open_bucket *ob; - struct closure *cl = NULL; - unsigned i; - int ret; - - req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target); - - /* Don't allocate from devices we already have pointers to: */ - darray_for_each(*req->devs_have, i) - __clear_bit(*i, req->devs_may_alloc.d); - - open_bucket_for_each(c, &req->ptrs, ob, i) - __clear_bit(ob->dev, req->devs_may_alloc.d); - - ret = bucket_alloc_set_writepoint(c, req); - if (ret) - return ret; - - ret = bucket_alloc_set_partial(c, req); - if (ret) - return ret; - - if (req->ec) { - ret = bucket_alloc_from_stripe(trans, req, _cl); - } else { -retry_blocking: - /* - * Try nonblocking first, so that if one device is full we'll try from - * other devices: - */ - ret = bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe, cl); - if (ret && - !bch2_err_matches(ret, BCH_ERR_transaction_restart) && - !bch2_err_matches(ret, BCH_ERR_insufficient_devices) && - !cl && _cl) { - cl = _cl; - goto retry_blocking; - } - } - - return ret; -} - -static int open_bucket_add_buckets(struct btree_trans *trans, - struct alloc_request *req, - struct closure *cl) -{ - int ret; - - if (req->ec && !ec_open_bucket(trans->c, &req->ptrs)) { - ret = __open_bucket_add_buckets(trans, req, cl); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || - bch2_err_matches(ret, BCH_ERR_operation_blocked) || - bch2_err_matches(ret, BCH_ERR_freelist_empty) || - bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) - return ret; - if (req->nr_effective >= req->nr_replicas) - return 0; - } - - bool ec = false; - swap(ec, req->ec); - ret = __open_bucket_add_buckets(trans, req, cl); - swap(ec, req->ec); - - return ret < 0 ? ret : 0; -} - /** * should_drop_bucket - check if this is open_bucket should go away * @ob: open_bucket to predicate on @@ -1255,72 +1185,95 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) erasure_code = false; + if (nr_replicas < 2) + erasure_code = false; + + req->cl = cl; req->nr_replicas = nr_replicas; req->target = target; - req->ec = erasure_code; req->watermark = watermark; req->flags = flags; req->devs_have = devs_have; BUG_ON(!nr_replicas || !nr_replicas_required); retry: - req->ptrs.nr = 0; - req->nr_effective = 0; - req->have_cache = false; - write_points_nr = c->write_points_nr; + req->ec = erasure_code; + req->will_retry_target_devices = true; + req->will_retry_all_devices = true; + req->ptrs.nr = 0; + req->nr_effective = 0; + req->have_cache = false; + write_points_nr = c->write_points_nr; *wp_ret = req->wp = writepoint_find(trans, write_point.v); req->data_type = req->wp->data_type; + /* metadata may not allocate on cache devices: */ + if (req->data_type != BCH_DATA_user) + req->have_cache = true; + ret = bch2_trans_relock(trans); if (ret) goto err; - /* metadata may not allocate on cache devices: */ - if (req->data_type != BCH_DATA_user) - req->have_cache = true; + while (1) { + req->devs_may_alloc = target_rw_devs(c, req->wp->data_type, req->target); - if (target && !(flags & BCH_WRITE_only_specified_devs)) { - ret = open_bucket_add_buckets(trans, req, NULL); - if (!ret || - bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto alloc_done; - - /* Don't retry from all devices if we're out of open buckets: */ - if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) { - int ret2 = open_bucket_add_buckets(trans, req, cl); - if (!ret2 || - bch2_err_matches(ret2, BCH_ERR_transaction_restart) || - bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) { - ret = ret2; - goto alloc_done; - } - } + /* Don't allocate from devices we already have pointers to: */ + darray_for_each(*req->devs_have, i) + __clear_bit(*i, req->devs_may_alloc.d); - /* - * Only try to allocate cache (durability = 0 devices) from the - * specified target: - */ - req->have_cache = true; - req->target = 0; + open_bucket_for_each(c, &req->ptrs, ob, i) + __clear_bit(ob->dev, req->devs_may_alloc.d); - ret = open_bucket_add_buckets(trans, req, cl); - } else { - ret = open_bucket_add_buckets(trans, req, cl); - } -alloc_done: - BUG_ON(!ret && req->nr_effective < req->nr_replicas); + ret = bucket_alloc_set_writepoint(c, req) ?: + bucket_alloc_set_partial(c, req) ?: + (req->ec + ? bucket_alloc_from_stripe(trans, req) + : bch2_bucket_alloc_set_trans(trans, req, &req->wp->stripe)); + + if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || + bch2_err_matches(ret, BCH_ERR_operation_blocked) || + bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) + goto err; + + if (ret == -BCH_ERR_freelist_empty || + ret == -BCH_ERR_insufficient_devices) { + if (req->will_retry_all_devices) { + BUG_ON(!req->will_retry_all_devices); + req->will_retry_all_devices = false; + /* + * Only try to allocate cache (durability = 0 devices) from the + * specified target: + */ + if (req->target && + (!(flags & BCH_WRITE_only_specified_devs) || + (ret == -BCH_ERR_insufficient_devices))) { + req->have_cache = true; + req->target = 0; + } + continue; + } - if (erasure_code && !ec_open_bucket(c, &req->ptrs)) - pr_debug("failed to get ec bucket: ret %u", ret); + if (ret == -BCH_ERR_insufficient_devices && + req->nr_effective >= nr_replicas_required) + ret = 0; + else + goto err; + } - if (ret == -BCH_ERR_insufficient_devices && - req->nr_effective >= nr_replicas_required) - ret = 0; + if (req->nr_effective < req->nr_replicas && req->ec) { + req->ec = false; + req->will_retry_target_devices = true; + req->will_retry_all_devices = true; + continue; + } - if (ret) - goto err; + BUG_ON(req->nr_effective < nr_replicas_required); + BUG_ON(ret < 0); + break; + } if (req->nr_effective > req->nr_replicas) deallocate_extra_replicas(c, req); |