diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2017-03-19 15:56:34 -0800 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2017-03-19 17:31:47 -0800 |
commit | 5ec39af8eaba49aee7bafa44c661da39e2f40dc3 (patch) | |
tree | 1fb1a981602cbf22c7d2b2dba1168c715d7cecb5 /libbcachefs/tier.c | |
parent | bb1941de5378a7b8122d3575dcbc7d0aeb6326f0 (diff) |
Rename from bcache-tools to bcachefs-tools
Diffstat (limited to 'libbcachefs/tier.c')
-rw-r--r-- | libbcachefs/tier.c | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/libbcachefs/tier.c b/libbcachefs/tier.c new file mode 100644 index 00000000..16d32928 --- /dev/null +++ b/libbcachefs/tier.c @@ -0,0 +1,282 @@ + +#include "bcachefs.h" +#include "alloc.h" +#include "btree_iter.h" +#include "buckets.h" +#include "clock.h" +#include "extents.h" +#include "io.h" +#include "keylist.h" +#include "move.h" +#include "super-io.h" +#include "tier.h" + +#include <linux/freezer.h> +#include <linux/kthread.h> +#include <trace/events/bcachefs.h> + +struct tiering_state { + struct bch_tier *tier; + unsigned sectors; + unsigned stripe_size; + unsigned dev_idx; + struct bch_dev *ca; +}; + +static bool tiering_pred(struct bch_fs *c, + struct tiering_state *s, + struct bkey_s_c k) +{ + if (bkey_extent_is_data(k.k)) { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const struct bch_extent_ptr *ptr; + unsigned replicas = 0; + + /* Make sure we have room to add a new pointer: */ + if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > + BKEY_EXTENT_VAL_U64s_MAX) + return false; + + extent_for_each_ptr(e, ptr) + if (c->devs[ptr->dev]->mi.tier >= s->tier->idx) + replicas++; + + return replicas < c->opts.data_replicas; + } + + return false; +} + +static void tier_put_device(struct tiering_state *s) +{ + if (s->ca) + percpu_ref_put(&s->ca->io_ref); + s->ca = NULL; +} + +/** + * refill_next - move on to refilling the next cache's tiering keylist + */ +static void tier_next_device(struct bch_fs *c, struct tiering_state *s) +{ + if (!s->ca || s->sectors > s->stripe_size) { + tier_put_device(s); + s->sectors = 0; + s->dev_idx++; + + spin_lock(&s->tier->devs.lock); + if (s->dev_idx >= s->tier->devs.nr) + s->dev_idx = 0; + + if (s->tier->devs.nr) { + s->ca = s->tier->devs.d[s->dev_idx].dev; + percpu_ref_get(&s->ca->io_ref); + } + spin_unlock(&s->tier->devs.lock); + } +} + +static int issue_tiering_move(struct bch_fs *c, + struct tiering_state *s, + struct moving_context *ctxt, + struct bkey_s_c k) +{ + int ret; + + ret = bch2_data_move(c, ctxt, &s->ca->tiering_write_point, k, NULL); + if (!ret) { + trace_tiering_copy(k.k); + s->sectors += k.k->size; + } else { + trace_tiering_alloc_fail(c, k.k->size); + } + + return ret; +} + +/** + * tiering_next_cache - issue a move to write an extent to the next cache + * device in round robin order + */ +static s64 read_tiering(struct bch_fs *c, struct bch_tier *tier) +{ + struct moving_context ctxt; + struct tiering_state s; + struct btree_iter iter; + struct bkey_s_c k; + unsigned nr_devices = READ_ONCE(tier->devs.nr); + int ret; + + if (!nr_devices) + return 0; + + trace_tiering_start(c); + + memset(&s, 0, sizeof(s)); + s.tier = tier; + s.stripe_size = 2048; /* 1 mb for now */ + + bch2_move_ctxt_init(&ctxt, &tier->pd.rate, + nr_devices * SECTORS_IN_FLIGHT_PER_DEVICE); + bch2_btree_iter_init(&iter, c, BTREE_ID_EXTENTS, POS_MIN); + + while (!kthread_should_stop() && + !bch2_move_ctxt_wait(&ctxt) && + (k = bch2_btree_iter_peek(&iter)).k && + !btree_iter_err(k)) { + if (!tiering_pred(c, &s, k)) + goto next; + + tier_next_device(c, &s); + if (!s.ca) + break; + + ret = issue_tiering_move(c, &s, &ctxt, k); + if (ret) { + bch2_btree_iter_unlock(&iter); + + /* memory allocation failure, wait for some IO to finish */ + bch2_move_ctxt_wait_for_io(&ctxt); + continue; + } +next: + bch2_btree_iter_advance_pos(&iter); + //bch2_btree_iter_cond_resched(&iter); + + /* unlock before calling moving_context_wait() */ + bch2_btree_iter_unlock(&iter); + cond_resched(); + } + + bch2_btree_iter_unlock(&iter); + tier_put_device(&s); + bch2_move_ctxt_exit(&ctxt); + trace_tiering_end(c, ctxt.sectors_moved, ctxt.keys_moved); + + return ctxt.sectors_moved; +} + +static int bch2_tiering_thread(void *arg) +{ + struct bch_tier *tier = arg; + struct bch_fs *c = container_of(tier, struct bch_fs, tiers[tier->idx]); + struct io_clock *clock = &c->io_clock[WRITE]; + struct bch_dev *ca; + u64 tier_capacity, available_sectors; + unsigned long last; + unsigned i; + + set_freezable(); + + while (!kthread_should_stop()) { + if (kthread_wait_freezable(c->tiering_enabled && + tier->devs.nr)) + break; + + while (1) { + struct bch_tier *faster_tier; + + last = atomic_long_read(&clock->now); + + tier_capacity = available_sectors = 0; + for (faster_tier = c->tiers; + faster_tier != tier; + faster_tier++) { + spin_lock(&faster_tier->devs.lock); + group_for_each_dev(ca, &faster_tier->devs, i) { + tier_capacity += + (ca->mi.nbuckets - + ca->mi.first_bucket) << ca->bucket_bits; + available_sectors += + dev_buckets_available(ca) << ca->bucket_bits; + } + spin_unlock(&faster_tier->devs.lock); + } + + if (available_sectors < (tier_capacity >> 1)) + break; + + bch2_kthread_io_clock_wait(clock, + last + + available_sectors - + (tier_capacity >> 1)); + if (kthread_should_stop()) + return 0; + } + + read_tiering(c, tier); + } + + return 0; +} + +static void __bch2_tiering_stop(struct bch_tier *tier) +{ + tier->pd.rate.rate = UINT_MAX; + bch2_ratelimit_reset(&tier->pd.rate); + + if (tier->migrate) + kthread_stop(tier->migrate); + + tier->migrate = NULL; +} + +void bch2_tiering_stop(struct bch_fs *c) +{ + struct bch_tier *tier; + + for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) + __bch2_tiering_stop(tier); +} + +static int __bch2_tiering_start(struct bch_tier *tier) +{ + if (!tier->migrate) { + struct task_struct *p = + kthread_create(bch2_tiering_thread, tier, + "bch_tier[%u]", tier->idx); + if (IS_ERR(p)) + return PTR_ERR(p); + + tier->migrate = p; + } + + wake_up_process(tier->migrate); + return 0; +} + +int bch2_tiering_start(struct bch_fs *c) +{ + struct bch_tier *tier; + bool have_faster_tier = false; + + if (c->opts.nochanges) + return 0; + + for (tier = c->tiers; tier < c->tiers + ARRAY_SIZE(c->tiers); tier++) { + if (!tier->devs.nr) + continue; + + if (have_faster_tier) { + int ret = __bch2_tiering_start(tier); + if (ret) + return ret; + } else { + __bch2_tiering_stop(tier); + } + + have_faster_tier = true; + } + + return 0; +} + +void bch2_fs_tiering_init(struct bch_fs *c) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(c->tiers); i++) { + c->tiers[i].idx = i; + bch2_pd_controller_init(&c->tiers[i].pd); + } +} |