diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2018-04-04 14:45:44 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@gmail.com> | 2018-04-06 16:36:33 -0400 |
commit | fc025752a95a767ec829f976a2d184acae1bbe61 (patch) | |
tree | 4844043f0c61c49daa2df9b9ba4d8f2562d66ed9 | |
parent | 7177d997313b96724e2d5e3b64d7f7a0c8ae18a9 (diff) |
bcachefs: rename prio -> last_io, refactor a bit
-rw-r--r-- | fs/bcachefs/alloc.c | 108 | ||||
-rw-r--r-- | fs/bcachefs/alloc_types.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/buckets.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/buckets_types.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/movinggc.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/sysfs.c | 69 |
9 files changed, 120 insertions, 102 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c index a9668a6d3dbc..bb33cad9ee7f 100644 --- a/fs/bcachefs/alloc.c +++ b/fs/bcachefs/alloc.c @@ -81,7 +81,7 @@ #include <linux/sort.h> #include <trace/events/bcachefs.h> -static void bch2_recalc_min_prio(struct bch_fs *, struct bch_dev *, int); +static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int); /* Ratelimiting/PD controllers */ @@ -238,9 +238,9 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) d = a.v->data; if (a.v->fields & (1 << BCH_ALLOC_FIELD_READ_TIME)) - g->prio[READ] = get_alloc_field(&d, 2); + g->io_time[READ] = get_alloc_field(&d, 2); if (a.v->fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME)) - g->prio[WRITE] = get_alloc_field(&d, 2); + g->io_time[WRITE] = get_alloc_field(&d, 2); lg_local_unlock(&c->usage_lock); } @@ -272,21 +272,21 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) bch2_alloc_read_key(c, bkey_i_to_s_c(k)); } - mutex_lock(&c->prio_clock[READ].lock); + mutex_lock(&c->bucket_clock[READ].lock); for_each_member_device(ca, c, i) { down_read(&ca->bucket_lock); - bch2_recalc_min_prio(c, ca, READ); + bch2_recalc_oldest_io(c, ca, READ); up_read(&ca->bucket_lock); } - mutex_unlock(&c->prio_clock[READ].lock); + mutex_unlock(&c->bucket_clock[READ].lock); - mutex_lock(&c->prio_clock[WRITE].lock); + mutex_lock(&c->bucket_clock[WRITE].lock); for_each_member_device(ca, c, i) { down_read(&ca->bucket_lock); - bch2_recalc_min_prio(c, ca, WRITE); + bch2_recalc_oldest_io(c, ca, WRITE); up_read(&ca->bucket_lock); } - mutex_unlock(&c->prio_clock[WRITE].lock); + mutex_unlock(&c->bucket_clock[WRITE].lock); return 0; } @@ -322,9 +322,9 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, d = a->v.data; if (a->v.fields & (1 << BCH_ALLOC_FIELD_READ_TIME)) - put_alloc_field(&d, 2, g->prio[READ]); + put_alloc_field(&d, 2, g->io_time[READ]); if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME)) - put_alloc_field(&d, 2, g->prio[WRITE]); + put_alloc_field(&d, 2, g->io_time[WRITE]); lg_local_unlock(&c->usage_lock); ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq, @@ -397,38 +397,34 @@ int bch2_alloc_write(struct bch_fs *c) /* Bucket IO clocks: */ -static void bch2_recalc_min_prio(struct bch_fs *c, struct bch_dev *ca, int rw) +static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw) { - struct prio_clock *clock = &c->prio_clock[rw]; + struct bucket_clock *clock = &c->bucket_clock[rw]; struct bucket_array *buckets = bucket_array(ca); struct bucket *g; - u16 max_delta = 1; + u16 max_last_io = 0; unsigned i; - lockdep_assert_held(&c->prio_clock[rw].lock); + lockdep_assert_held(&c->bucket_clock[rw].lock); - /* Determine min prio for this particular device */ + /* Recalculate max_last_io for this device: */ for_each_bucket(g, buckets) - max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw])); + max_last_io = max(max_last_io, bucket_last_io(c, g, rw)); - ca->min_prio[rw] = clock->hand - max_delta; + ca->max_last_bucket_io[rw] = max_last_io; - /* - * This may possibly increase the min prio for the whole device, check - * that as well. - */ - max_delta = 1; + /* Recalculate global max_last_io: */ + max_last_io = 0; for_each_member_device(ca, c, i) - max_delta = max(max_delta, - (u16) (clock->hand - ca->min_prio[rw])); + max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]); - clock->min_prio = clock->hand - max_delta; + clock->max_last_io = max_last_io; } -static void bch2_rescale_prios(struct bch_fs *c, int rw) +static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw) { - struct prio_clock *clock = &c->prio_clock[rw]; + struct bucket_clock *clock = &c->bucket_clock[rw]; struct bucket_array *buckets; struct bch_dev *ca; struct bucket *g; @@ -441,10 +437,10 @@ static void bch2_rescale_prios(struct bch_fs *c, int rw) buckets = bucket_array(ca); for_each_bucket(g, buckets) - g->prio[rw] = clock->hand - - (clock->hand - g->prio[rw]) / 2; + g->io_time[rw] = clock->hand - + bucket_last_io(c, g, rw) / 2; - bch2_recalc_min_prio(c, ca, rw); + bch2_recalc_oldest_io(c, ca, rw); up_read(&ca->bucket_lock); } @@ -452,19 +448,26 @@ static void bch2_rescale_prios(struct bch_fs *c, int rw) static void bch2_inc_clock_hand(struct io_timer *timer) { - struct prio_clock *clock = container_of(timer, - struct prio_clock, rescale); + struct bucket_clock *clock = container_of(timer, + struct bucket_clock, rescale); struct bch_fs *c = container_of(clock, - struct bch_fs, prio_clock[clock->rw]); + struct bch_fs, bucket_clock[clock->rw]); + struct bch_dev *ca; u64 capacity; + unsigned i; mutex_lock(&clock->lock); - clock->hand++; - /* if clock cannot be advanced more, rescale prio */ - if (clock->hand == (u16) (clock->min_prio - 1)) - bch2_rescale_prios(c, clock->rw); + if (clock->max_last_io >= U16_MAX - 2) + bch2_rescale_bucket_io_times(c, clock->rw); + + BUG_ON(clock->max_last_io >= U16_MAX - 2); + + for_each_member_device(ca, c, i) + ca->max_last_bucket_io[clock->rw]++; + clock->max_last_io++; + clock->hand++; mutex_unlock(&clock->lock); @@ -486,9 +489,9 @@ static void bch2_inc_clock_hand(struct io_timer *timer) bch2_io_timer_add(&c->io_clock[clock->rw], timer); } -static void bch2_prio_timer_init(struct bch_fs *c, int rw) +static void bch2_bucket_clock_init(struct bch_fs *c, int rw) { - struct prio_clock *clock = &c->prio_clock[rw]; + struct bucket_clock *clock = &c->bucket_clock[rw]; clock->hand = 1; clock->rw = rw; @@ -637,13 +640,14 @@ static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca, size_t b, struct bucket_mark m) { + unsigned last_io = bucket_last_io(c, bucket(ca, b), READ); + unsigned max_last_io = ca->max_last_bucket_io[READ]; + /* * Time since last read, scaled to [0, 8) where larger value indicates * more recently read data: */ - unsigned long hotness = - (bucket(ca, b)->prio[READ] - ca->min_prio[READ]) * 7 / - (c->prio_clock[READ].hand - ca->min_prio[READ]); + unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io; /* How much we want to keep the data in this bucket: */ unsigned long data_wantness = @@ -674,12 +678,12 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca) ca->alloc_heap.used = 0; - mutex_lock(&c->prio_clock[READ].lock); + mutex_lock(&c->bucket_clock[READ].lock); down_read(&ca->bucket_lock); buckets = bucket_array(ca); - bch2_recalc_min_prio(c, ca, READ); + bch2_recalc_oldest_io(c, ca, READ); /* * Find buckets with lowest read priority, by building a maxheap sorted @@ -713,7 +717,7 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca) heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp); up_read(&ca->bucket_lock); - mutex_unlock(&c->prio_clock[READ].lock); + mutex_unlock(&c->bucket_clock[READ].lock); heap_resort(&ca->alloc_heap, bucket_alloc_cmp); @@ -1818,14 +1822,14 @@ void bch2_recalc_capacity(struct bch_fs *c) if (c->capacity) { bch2_io_timer_add(&c->io_clock[READ], - &c->prio_clock[READ].rescale); + &c->bucket_clock[READ].rescale); bch2_io_timer_add(&c->io_clock[WRITE], - &c->prio_clock[WRITE].rescale); + &c->bucket_clock[WRITE].rescale); } else { bch2_io_timer_del(&c->io_clock[READ], - &c->prio_clock[READ].rescale); + &c->bucket_clock[READ].rescale); bch2_io_timer_del(&c->io_clock[WRITE], - &c->prio_clock[WRITE].rescale); + &c->bucket_clock[WRITE].rescale); } /* Wake up case someone was waiting for buckets */ @@ -2191,8 +2195,8 @@ void bch2_fs_allocator_init(struct bch_fs *c) mutex_init(&c->write_points_hash_lock); spin_lock_init(&c->freelist_lock); - bch2_prio_timer_init(c, READ); - bch2_prio_timer_init(c, WRITE); + bch2_bucket_clock_init(c, READ); + bch2_bucket_clock_init(c, WRITE); /* open bucket 0 is a sentinal NULL: */ spin_lock_init(&c->open_buckets[0].lock); diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index bee1e5a35778..8a71a37637de 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -8,7 +8,7 @@ #include "fifo.h" /* There's two of these clocks, one for reads and one for writes: */ -struct prio_clock { +struct bucket_clock { /* * "now" in (read/write) IO time - incremented whenever we do X amount * of reads or writes. @@ -23,7 +23,7 @@ struct prio_clock { * consistent. */ u16 hand; - u16 min_prio; + u16 max_last_io; int rw; diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 0cda0975301e..9c4e7fdecbc7 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -392,7 +392,7 @@ struct bch_dev { size_t fifo_last_bucket; /* last calculated minimum prio */ - u16 min_prio[2]; + u16 max_last_bucket_io[2]; atomic_long_t saturated_count; size_t inc_gen_needs_gc; @@ -596,7 +596,7 @@ struct bch_fs { * those together consistently we keep track of the smallest nonzero * priority of any bucket. */ - struct prio_clock prio_clock[2]; + struct bucket_clock bucket_clock[2]; struct io_clock io_clock[2]; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index fda7fd704ae8..8dbd9152aec8 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -47,7 +47,12 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b) static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca, size_t b, int rw) { - bucket(ca, b)->prio[rw] = c->prio_clock[rw].hand; + bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand; +} + +static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw) +{ + return c->bucket_clock[rw].hand - g->io_time[rw]; } /* diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h index a0256e13618c..28bd2c596477 100644 --- a/fs/bcachefs/buckets_types.h +++ b/fs/bcachefs/buckets_types.h @@ -31,12 +31,12 @@ struct bucket_mark { }; struct bucket { - u16 prio[2]; - union { struct bucket_mark _mark; const struct bucket_mark mark; }; + + u16 io_time[2]; }; struct bucket_array { @@ -85,8 +85,9 @@ struct disk_reservation { }; struct copygc_heap_entry { + u8 gen; + u32 sectors; u64 offset; - struct bucket_mark mark; }; typedef HEAP(struct copygc_heap_entry) copygc_heap; diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index ebbed8567c1f..9d19031ccfa9 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -2321,8 +2321,8 @@ static void journal_write(struct closure *cl) journal_write_compact(jset); - jset->read_clock = cpu_to_le16(c->prio_clock[READ].hand); - jset->write_clock = cpu_to_le16(c->prio_clock[WRITE].hand); + jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand); + jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand); jset->magic = cpu_to_le64(jset_magic(c)); jset->version = cpu_to_le32(BCACHE_JSET_VERSION); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 75113d6c6647..28dabca74565 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -52,7 +52,7 @@ static inline int sectors_used_cmp(copygc_heap *heap, struct copygc_heap_entry l, struct copygc_heap_entry r) { - return bucket_sectors_used(l.mark) - bucket_sectors_used(r.mark); + return (l.sectors > r.sectors) - (l.sectors < r.sectors); } static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) @@ -79,7 +79,7 @@ static bool __copygc_pred(struct bch_dev *ca, return (i >= 0 && ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].mark.gen); + ptr->gen == h->data[i].gen); } return false; @@ -155,8 +155,9 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) continue; e = (struct copygc_heap_entry) { - .offset = bucket_to_sector(ca, b), - .mark = m + .gen = m.gen, + .sectors = bucket_sectors_used(m), + .offset = bucket_to_sector(ca, b), }; heap_add_or_replace(h, e, -sectors_used_cmp); } @@ -164,11 +165,11 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) up_read(&c->gc_lock); for (i = h->data; i < h->data + h->used; i++) - sectors_to_move += bucket_sectors_used(i->mark); + sectors_to_move += i->sectors; while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) { BUG_ON(!heap_pop(h, e, -sectors_used_cmp)); - sectors_to_move -= bucket_sectors_used(e.mark); + sectors_to_move -= e.sectors; } buckets_to_move = h->used; @@ -192,7 +193,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) size_t b = sector_to_bucket(ca, i->offset); struct bucket_mark m = READ_ONCE(buckets->b[b].mark); - if (i->mark.gen == m.gen && bucket_sectors_used(m)) { + if (i->gen == m.gen && bucket_sectors_used(m)) { sectors_not_moved += bucket_sectors_used(m); buckets_not_moved++; } diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index f88a313f72ae..92cf630fcf8b 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -726,8 +726,8 @@ const char *bch2_fs_start(struct bch_fs *c) j = &list_entry(journal.prev, struct journal_replay, list)->j; - c->prio_clock[READ].hand = le16_to_cpu(j->read_clock); - c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock); + c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock); + c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock); for (i = 0; i < BTREE_ID_NR; i++) { unsigned level; diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index c440eca4fb87..f796f7729a80 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -142,10 +142,10 @@ read_attribute(first_bucket); read_attribute(nbuckets); read_attribute(durability); read_attribute(iostats); -read_attribute(read_priority_stats); -read_attribute(write_priority_stats); -read_attribute(fragmentation_stats); -read_attribute(oldest_gen_stats); +read_attribute(last_read_quantiles); +read_attribute(last_write_quantiles); +read_attribute(fragmentation_quantiles); +read_attribute(oldest_gen_quantiles); read_attribute(reserve_stats); read_attribute(btree_cache_size); read_attribute(compression_stats); @@ -623,36 +623,41 @@ struct attribute *bch2_fs_time_stats_files[] = { NULL }; -typedef unsigned (bucket_map_fn)(struct bch_dev *, size_t, void *); +typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *, + size_t, void *); -static unsigned bucket_priority_fn(struct bch_dev *ca, size_t b, - void *private) +static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca, + size_t b, void *private) { - struct bucket *g = bucket(ca, b); int rw = (private ? 1 : 0); - return ca->fs->prio_clock[rw].hand - g->prio[rw]; + return bucket_last_io(c, bucket(ca, b), rw); } -static unsigned bucket_sectors_used_fn(struct bch_dev *ca, size_t b, - void *private) +static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca, + size_t b, void *private) { struct bucket *g = bucket(ca, b); return bucket_sectors_used(g->mark); } -static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, size_t b, - void *private) +static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca, + size_t b, void *private) { return bucket_gc_gen(ca, b); } -static ssize_t show_quantiles(struct bch_dev *ca, char *buf, - bucket_map_fn *fn, void *private) +static int unsigned_cmp(const void *_l, const void *_r) { - int cmp(const void *l, const void *r) - { return *((unsigned *) r) - *((unsigned *) l); } + unsigned l = *((unsigned *) _l); + unsigned r = *((unsigned *) _r); + + return (l > r) - (l < r); +} +static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca, + char *buf, bucket_map_fn *fn, void *private) +{ size_t i, n; /* Compute 31 quantiles */ unsigned q[31], *p; @@ -668,9 +673,9 @@ static ssize_t show_quantiles(struct bch_dev *ca, char *buf, } for (i = ca->mi.first_bucket; i < n; i++) - p[i] = fn(ca, i, private); + p[i] = fn(c, ca, i, private); - sort(p, n, sizeof(unsigned), cmp, NULL); + sort(p, n, sizeof(unsigned), unsigned_cmp, NULL); up_read(&ca->bucket_lock); while (n && @@ -854,14 +859,16 @@ SHOW(bch2_dev) if (attr == &sysfs_iostats) return show_dev_iostats(ca, buf); - if (attr == &sysfs_read_priority_stats) - return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0); - if (attr == &sysfs_write_priority_stats) - return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1); - if (attr == &sysfs_fragmentation_stats) - return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL); - if (attr == &sysfs_oldest_gen_stats) - return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL); + + if (attr == &sysfs_last_read_quantiles) + return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0); + if (attr == &sysfs_last_write_quantiles) + return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1); + if (attr == &sysfs_fragmentation_quantiles) + return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL); + if (attr == &sysfs_oldest_gen_quantiles) + return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL); + if (attr == &sysfs_reserve_stats) return show_reserve_stats(ca, buf); if (attr == &sysfs_alloc_debug) @@ -946,10 +953,10 @@ struct attribute *bch2_dev_files[] = { &sysfs_iostats, /* alloc info - other stats: */ - &sysfs_read_priority_stats, - &sysfs_write_priority_stats, - &sysfs_fragmentation_stats, - &sysfs_oldest_gen_stats, + &sysfs_last_read_quantiles, + &sysfs_last_write_quantiles, + &sysfs_fragmentation_quantiles, + &sysfs_oldest_gen_quantiles, &sysfs_reserve_stats, /* debug: */ |