summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2018-04-04 14:45:44 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2018-04-06 16:36:33 -0400
commitfc025752a95a767ec829f976a2d184acae1bbe61 (patch)
tree4844043f0c61c49daa2df9b9ba4d8f2562d66ed9
parent7177d997313b96724e2d5e3b64d7f7a0c8ae18a9 (diff)
bcachefs: rename prio -> last_io, refactor a bit
-rw-r--r--fs/bcachefs/alloc.c108
-rw-r--r--fs/bcachefs/alloc_types.h4
-rw-r--r--fs/bcachefs/bcachefs.h4
-rw-r--r--fs/bcachefs/buckets.h7
-rw-r--r--fs/bcachefs/buckets_types.h7
-rw-r--r--fs/bcachefs/journal.c4
-rw-r--r--fs/bcachefs/movinggc.c15
-rw-r--r--fs/bcachefs/super.c4
-rw-r--r--fs/bcachefs/sysfs.c69
9 files changed, 120 insertions, 102 deletions
diff --git a/fs/bcachefs/alloc.c b/fs/bcachefs/alloc.c
index a9668a6d3dbc..bb33cad9ee7f 100644
--- a/fs/bcachefs/alloc.c
+++ b/fs/bcachefs/alloc.c
@@ -81,7 +81,7 @@
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
-static void bch2_recalc_min_prio(struct bch_fs *, struct bch_dev *, int);
+static void bch2_recalc_oldest_io(struct bch_fs *, struct bch_dev *, int);
/* Ratelimiting/PD controllers */
@@ -238,9 +238,9 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k)
d = a.v->data;
if (a.v->fields & (1 << BCH_ALLOC_FIELD_READ_TIME))
- g->prio[READ] = get_alloc_field(&d, 2);
+ g->io_time[READ] = get_alloc_field(&d, 2);
if (a.v->fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
- g->prio[WRITE] = get_alloc_field(&d, 2);
+ g->io_time[WRITE] = get_alloc_field(&d, 2);
lg_local_unlock(&c->usage_lock);
}
@@ -272,21 +272,21 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
bch2_alloc_read_key(c, bkey_i_to_s_c(k));
}
- mutex_lock(&c->prio_clock[READ].lock);
+ mutex_lock(&c->bucket_clock[READ].lock);
for_each_member_device(ca, c, i) {
down_read(&ca->bucket_lock);
- bch2_recalc_min_prio(c, ca, READ);
+ bch2_recalc_oldest_io(c, ca, READ);
up_read(&ca->bucket_lock);
}
- mutex_unlock(&c->prio_clock[READ].lock);
+ mutex_unlock(&c->bucket_clock[READ].lock);
- mutex_lock(&c->prio_clock[WRITE].lock);
+ mutex_lock(&c->bucket_clock[WRITE].lock);
for_each_member_device(ca, c, i) {
down_read(&ca->bucket_lock);
- bch2_recalc_min_prio(c, ca, WRITE);
+ bch2_recalc_oldest_io(c, ca, WRITE);
up_read(&ca->bucket_lock);
}
- mutex_unlock(&c->prio_clock[WRITE].lock);
+ mutex_unlock(&c->bucket_clock[WRITE].lock);
return 0;
}
@@ -322,9 +322,9 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
d = a->v.data;
if (a->v.fields & (1 << BCH_ALLOC_FIELD_READ_TIME))
- put_alloc_field(&d, 2, g->prio[READ]);
+ put_alloc_field(&d, 2, g->io_time[READ]);
if (a->v.fields & (1 << BCH_ALLOC_FIELD_WRITE_TIME))
- put_alloc_field(&d, 2, g->prio[WRITE]);
+ put_alloc_field(&d, 2, g->io_time[WRITE]);
lg_local_unlock(&c->usage_lock);
ret = bch2_btree_insert_at(c, NULL, NULL, journal_seq,
@@ -397,38 +397,34 @@ int bch2_alloc_write(struct bch_fs *c)
/* Bucket IO clocks: */
-static void bch2_recalc_min_prio(struct bch_fs *c, struct bch_dev *ca, int rw)
+static void bch2_recalc_oldest_io(struct bch_fs *c, struct bch_dev *ca, int rw)
{
- struct prio_clock *clock = &c->prio_clock[rw];
+ struct bucket_clock *clock = &c->bucket_clock[rw];
struct bucket_array *buckets = bucket_array(ca);
struct bucket *g;
- u16 max_delta = 1;
+ u16 max_last_io = 0;
unsigned i;
- lockdep_assert_held(&c->prio_clock[rw].lock);
+ lockdep_assert_held(&c->bucket_clock[rw].lock);
- /* Determine min prio for this particular device */
+ /* Recalculate max_last_io for this device: */
for_each_bucket(g, buckets)
- max_delta = max(max_delta, (u16) (clock->hand - g->prio[rw]));
+ max_last_io = max(max_last_io, bucket_last_io(c, g, rw));
- ca->min_prio[rw] = clock->hand - max_delta;
+ ca->max_last_bucket_io[rw] = max_last_io;
- /*
- * This may possibly increase the min prio for the whole device, check
- * that as well.
- */
- max_delta = 1;
+ /* Recalculate global max_last_io: */
+ max_last_io = 0;
for_each_member_device(ca, c, i)
- max_delta = max(max_delta,
- (u16) (clock->hand - ca->min_prio[rw]));
+ max_last_io = max(max_last_io, ca->max_last_bucket_io[rw]);
- clock->min_prio = clock->hand - max_delta;
+ clock->max_last_io = max_last_io;
}
-static void bch2_rescale_prios(struct bch_fs *c, int rw)
+static void bch2_rescale_bucket_io_times(struct bch_fs *c, int rw)
{
- struct prio_clock *clock = &c->prio_clock[rw];
+ struct bucket_clock *clock = &c->bucket_clock[rw];
struct bucket_array *buckets;
struct bch_dev *ca;
struct bucket *g;
@@ -441,10 +437,10 @@ static void bch2_rescale_prios(struct bch_fs *c, int rw)
buckets = bucket_array(ca);
for_each_bucket(g, buckets)
- g->prio[rw] = clock->hand -
- (clock->hand - g->prio[rw]) / 2;
+ g->io_time[rw] = clock->hand -
+ bucket_last_io(c, g, rw) / 2;
- bch2_recalc_min_prio(c, ca, rw);
+ bch2_recalc_oldest_io(c, ca, rw);
up_read(&ca->bucket_lock);
}
@@ -452,19 +448,26 @@ static void bch2_rescale_prios(struct bch_fs *c, int rw)
static void bch2_inc_clock_hand(struct io_timer *timer)
{
- struct prio_clock *clock = container_of(timer,
- struct prio_clock, rescale);
+ struct bucket_clock *clock = container_of(timer,
+ struct bucket_clock, rescale);
struct bch_fs *c = container_of(clock,
- struct bch_fs, prio_clock[clock->rw]);
+ struct bch_fs, bucket_clock[clock->rw]);
+ struct bch_dev *ca;
u64 capacity;
+ unsigned i;
mutex_lock(&clock->lock);
- clock->hand++;
-
/* if clock cannot be advanced more, rescale prio */
- if (clock->hand == (u16) (clock->min_prio - 1))
- bch2_rescale_prios(c, clock->rw);
+ if (clock->max_last_io >= U16_MAX - 2)
+ bch2_rescale_bucket_io_times(c, clock->rw);
+
+ BUG_ON(clock->max_last_io >= U16_MAX - 2);
+
+ for_each_member_device(ca, c, i)
+ ca->max_last_bucket_io[clock->rw]++;
+ clock->max_last_io++;
+ clock->hand++;
mutex_unlock(&clock->lock);
@@ -486,9 +489,9 @@ static void bch2_inc_clock_hand(struct io_timer *timer)
bch2_io_timer_add(&c->io_clock[clock->rw], timer);
}
-static void bch2_prio_timer_init(struct bch_fs *c, int rw)
+static void bch2_bucket_clock_init(struct bch_fs *c, int rw)
{
- struct prio_clock *clock = &c->prio_clock[rw];
+ struct bucket_clock *clock = &c->bucket_clock[rw];
clock->hand = 1;
clock->rw = rw;
@@ -637,13 +640,14 @@ static void bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
static unsigned long bucket_sort_key(struct bch_fs *c, struct bch_dev *ca,
size_t b, struct bucket_mark m)
{
+ unsigned last_io = bucket_last_io(c, bucket(ca, b), READ);
+ unsigned max_last_io = ca->max_last_bucket_io[READ];
+
/*
* Time since last read, scaled to [0, 8) where larger value indicates
* more recently read data:
*/
- unsigned long hotness =
- (bucket(ca, b)->prio[READ] - ca->min_prio[READ]) * 7 /
- (c->prio_clock[READ].hand - ca->min_prio[READ]);
+ unsigned long hotness = (max_last_io - last_io) * 7 / max_last_io;
/* How much we want to keep the data in this bucket: */
unsigned long data_wantness =
@@ -674,12 +678,12 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
ca->alloc_heap.used = 0;
- mutex_lock(&c->prio_clock[READ].lock);
+ mutex_lock(&c->bucket_clock[READ].lock);
down_read(&ca->bucket_lock);
buckets = bucket_array(ca);
- bch2_recalc_min_prio(c, ca, READ);
+ bch2_recalc_oldest_io(c, ca, READ);
/*
* Find buckets with lowest read priority, by building a maxheap sorted
@@ -713,7 +717,7 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
heap_add_or_replace(&ca->alloc_heap, e, -bucket_alloc_cmp);
up_read(&ca->bucket_lock);
- mutex_unlock(&c->prio_clock[READ].lock);
+ mutex_unlock(&c->bucket_clock[READ].lock);
heap_resort(&ca->alloc_heap, bucket_alloc_cmp);
@@ -1818,14 +1822,14 @@ void bch2_recalc_capacity(struct bch_fs *c)
if (c->capacity) {
bch2_io_timer_add(&c->io_clock[READ],
- &c->prio_clock[READ].rescale);
+ &c->bucket_clock[READ].rescale);
bch2_io_timer_add(&c->io_clock[WRITE],
- &c->prio_clock[WRITE].rescale);
+ &c->bucket_clock[WRITE].rescale);
} else {
bch2_io_timer_del(&c->io_clock[READ],
- &c->prio_clock[READ].rescale);
+ &c->bucket_clock[READ].rescale);
bch2_io_timer_del(&c->io_clock[WRITE],
- &c->prio_clock[WRITE].rescale);
+ &c->bucket_clock[WRITE].rescale);
}
/* Wake up case someone was waiting for buckets */
@@ -2191,8 +2195,8 @@ void bch2_fs_allocator_init(struct bch_fs *c)
mutex_init(&c->write_points_hash_lock);
spin_lock_init(&c->freelist_lock);
- bch2_prio_timer_init(c, READ);
- bch2_prio_timer_init(c, WRITE);
+ bch2_bucket_clock_init(c, READ);
+ bch2_bucket_clock_init(c, WRITE);
/* open bucket 0 is a sentinal NULL: */
spin_lock_init(&c->open_buckets[0].lock);
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h
index bee1e5a35778..8a71a37637de 100644
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -8,7 +8,7 @@
#include "fifo.h"
/* There's two of these clocks, one for reads and one for writes: */
-struct prio_clock {
+struct bucket_clock {
/*
* "now" in (read/write) IO time - incremented whenever we do X amount
* of reads or writes.
@@ -23,7 +23,7 @@ struct prio_clock {
* consistent.
*/
u16 hand;
- u16 min_prio;
+ u16 max_last_io;
int rw;
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index 0cda0975301e..9c4e7fdecbc7 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -392,7 +392,7 @@ struct bch_dev {
size_t fifo_last_bucket;
/* last calculated minimum prio */
- u16 min_prio[2];
+ u16 max_last_bucket_io[2];
atomic_long_t saturated_count;
size_t inc_gen_needs_gc;
@@ -596,7 +596,7 @@ struct bch_fs {
* those together consistently we keep track of the smallest nonzero
* priority of any bucket.
*/
- struct prio_clock prio_clock[2];
+ struct bucket_clock bucket_clock[2];
struct io_clock io_clock[2];
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index fda7fd704ae8..8dbd9152aec8 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -47,7 +47,12 @@ static inline struct bucket *bucket(struct bch_dev *ca, size_t b)
static inline void bucket_io_clock_reset(struct bch_fs *c, struct bch_dev *ca,
size_t b, int rw)
{
- bucket(ca, b)->prio[rw] = c->prio_clock[rw].hand;
+ bucket(ca, b)->io_time[rw] = c->bucket_clock[rw].hand;
+}
+
+static inline u16 bucket_last_io(struct bch_fs *c, struct bucket *g, int rw)
+{
+ return c->bucket_clock[rw].hand - g->io_time[rw];
}
/*
diff --git a/fs/bcachefs/buckets_types.h b/fs/bcachefs/buckets_types.h
index a0256e13618c..28bd2c596477 100644
--- a/fs/bcachefs/buckets_types.h
+++ b/fs/bcachefs/buckets_types.h
@@ -31,12 +31,12 @@ struct bucket_mark {
};
struct bucket {
- u16 prio[2];
-
union {
struct bucket_mark _mark;
const struct bucket_mark mark;
};
+
+ u16 io_time[2];
};
struct bucket_array {
@@ -85,8 +85,9 @@ struct disk_reservation {
};
struct copygc_heap_entry {
+ u8 gen;
+ u32 sectors;
u64 offset;
- struct bucket_mark mark;
};
typedef HEAP(struct copygc_heap_entry) copygc_heap;
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index ebbed8567c1f..9d19031ccfa9 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -2321,8 +2321,8 @@ static void journal_write(struct closure *cl)
journal_write_compact(jset);
- jset->read_clock = cpu_to_le16(c->prio_clock[READ].hand);
- jset->write_clock = cpu_to_le16(c->prio_clock[WRITE].hand);
+ jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand);
+ jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand);
jset->magic = cpu_to_le64(jset_magic(c));
jset->version = cpu_to_le32(BCACHE_JSET_VERSION);
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 75113d6c6647..28dabca74565 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -52,7 +52,7 @@ static inline int sectors_used_cmp(copygc_heap *heap,
struct copygc_heap_entry l,
struct copygc_heap_entry r)
{
- return bucket_sectors_used(l.mark) - bucket_sectors_used(r.mark);
+ return (l.sectors > r.sectors) - (l.sectors < r.sectors);
}
static int bucket_offset_cmp(const void *_l, const void *_r, size_t size)
@@ -79,7 +79,7 @@ static bool __copygc_pred(struct bch_dev *ca,
return (i >= 0 &&
ptr->offset < h->data[i].offset + ca->mi.bucket_size &&
- ptr->gen == h->data[i].mark.gen);
+ ptr->gen == h->data[i].gen);
}
return false;
@@ -155,8 +155,9 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
continue;
e = (struct copygc_heap_entry) {
- .offset = bucket_to_sector(ca, b),
- .mark = m
+ .gen = m.gen,
+ .sectors = bucket_sectors_used(m),
+ .offset = bucket_to_sector(ca, b),
};
heap_add_or_replace(h, e, -sectors_used_cmp);
}
@@ -164,11 +165,11 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
up_read(&c->gc_lock);
for (i = h->data; i < h->data + h->used; i++)
- sectors_to_move += bucket_sectors_used(i->mark);
+ sectors_to_move += i->sectors;
while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) {
BUG_ON(!heap_pop(h, e, -sectors_used_cmp));
- sectors_to_move -= bucket_sectors_used(e.mark);
+ sectors_to_move -= e.sectors;
}
buckets_to_move = h->used;
@@ -192,7 +193,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca)
size_t b = sector_to_bucket(ca, i->offset);
struct bucket_mark m = READ_ONCE(buckets->b[b].mark);
- if (i->mark.gen == m.gen && bucket_sectors_used(m)) {
+ if (i->gen == m.gen && bucket_sectors_used(m)) {
sectors_not_moved += bucket_sectors_used(m);
buckets_not_moved++;
}
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index f88a313f72ae..92cf630fcf8b 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -726,8 +726,8 @@ const char *bch2_fs_start(struct bch_fs *c)
j = &list_entry(journal.prev, struct journal_replay, list)->j;
- c->prio_clock[READ].hand = le16_to_cpu(j->read_clock);
- c->prio_clock[WRITE].hand = le16_to_cpu(j->write_clock);
+ c->bucket_clock[READ].hand = le16_to_cpu(j->read_clock);
+ c->bucket_clock[WRITE].hand = le16_to_cpu(j->write_clock);
for (i = 0; i < BTREE_ID_NR; i++) {
unsigned level;
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index c440eca4fb87..f796f7729a80 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -142,10 +142,10 @@ read_attribute(first_bucket);
read_attribute(nbuckets);
read_attribute(durability);
read_attribute(iostats);
-read_attribute(read_priority_stats);
-read_attribute(write_priority_stats);
-read_attribute(fragmentation_stats);
-read_attribute(oldest_gen_stats);
+read_attribute(last_read_quantiles);
+read_attribute(last_write_quantiles);
+read_attribute(fragmentation_quantiles);
+read_attribute(oldest_gen_quantiles);
read_attribute(reserve_stats);
read_attribute(btree_cache_size);
read_attribute(compression_stats);
@@ -623,36 +623,41 @@ struct attribute *bch2_fs_time_stats_files[] = {
NULL
};
-typedef unsigned (bucket_map_fn)(struct bch_dev *, size_t, void *);
+typedef unsigned (bucket_map_fn)(struct bch_fs *, struct bch_dev *,
+ size_t, void *);
-static unsigned bucket_priority_fn(struct bch_dev *ca, size_t b,
- void *private)
+static unsigned bucket_last_io_fn(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, void *private)
{
- struct bucket *g = bucket(ca, b);
int rw = (private ? 1 : 0);
- return ca->fs->prio_clock[rw].hand - g->prio[rw];
+ return bucket_last_io(c, bucket(ca, b), rw);
}
-static unsigned bucket_sectors_used_fn(struct bch_dev *ca, size_t b,
- void *private)
+static unsigned bucket_sectors_used_fn(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, void *private)
{
struct bucket *g = bucket(ca, b);
return bucket_sectors_used(g->mark);
}
-static unsigned bucket_oldest_gen_fn(struct bch_dev *ca, size_t b,
- void *private)
+static unsigned bucket_oldest_gen_fn(struct bch_fs *c, struct bch_dev *ca,
+ size_t b, void *private)
{
return bucket_gc_gen(ca, b);
}
-static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
- bucket_map_fn *fn, void *private)
+static int unsigned_cmp(const void *_l, const void *_r)
{
- int cmp(const void *l, const void *r)
- { return *((unsigned *) r) - *((unsigned *) l); }
+ unsigned l = *((unsigned *) _l);
+ unsigned r = *((unsigned *) _r);
+
+ return (l > r) - (l < r);
+}
+static ssize_t show_quantiles(struct bch_fs *c, struct bch_dev *ca,
+ char *buf, bucket_map_fn *fn, void *private)
+{
size_t i, n;
/* Compute 31 quantiles */
unsigned q[31], *p;
@@ -668,9 +673,9 @@ static ssize_t show_quantiles(struct bch_dev *ca, char *buf,
}
for (i = ca->mi.first_bucket; i < n; i++)
- p[i] = fn(ca, i, private);
+ p[i] = fn(c, ca, i, private);
- sort(p, n, sizeof(unsigned), cmp, NULL);
+ sort(p, n, sizeof(unsigned), unsigned_cmp, NULL);
up_read(&ca->bucket_lock);
while (n &&
@@ -854,14 +859,16 @@ SHOW(bch2_dev)
if (attr == &sysfs_iostats)
return show_dev_iostats(ca, buf);
- if (attr == &sysfs_read_priority_stats)
- return show_quantiles(ca, buf, bucket_priority_fn, (void *) 0);
- if (attr == &sysfs_write_priority_stats)
- return show_quantiles(ca, buf, bucket_priority_fn, (void *) 1);
- if (attr == &sysfs_fragmentation_stats)
- return show_quantiles(ca, buf, bucket_sectors_used_fn, NULL);
- if (attr == &sysfs_oldest_gen_stats)
- return show_quantiles(ca, buf, bucket_oldest_gen_fn, NULL);
+
+ if (attr == &sysfs_last_read_quantiles)
+ return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 0);
+ if (attr == &sysfs_last_write_quantiles)
+ return show_quantiles(c, ca, buf, bucket_last_io_fn, (void *) 1);
+ if (attr == &sysfs_fragmentation_quantiles)
+ return show_quantiles(c, ca, buf, bucket_sectors_used_fn, NULL);
+ if (attr == &sysfs_oldest_gen_quantiles)
+ return show_quantiles(c, ca, buf, bucket_oldest_gen_fn, NULL);
+
if (attr == &sysfs_reserve_stats)
return show_reserve_stats(ca, buf);
if (attr == &sysfs_alloc_debug)
@@ -946,10 +953,10 @@ struct attribute *bch2_dev_files[] = {
&sysfs_iostats,
/* alloc info - other stats: */
- &sysfs_read_priority_stats,
- &sysfs_write_priority_stats,
- &sysfs_fragmentation_stats,
- &sysfs_oldest_gen_stats,
+ &sysfs_last_read_quantiles,
+ &sysfs_last_write_quantiles,
+ &sysfs_fragmentation_quantiles,
+ &sysfs_oldest_gen_quantiles,
&sysfs_reserve_stats,
/* debug: */