diff options
Diffstat (limited to 'libbcache/journal.c')
-rw-r--r-- | libbcache/journal.c | 213 |
1 files changed, 134 insertions, 79 deletions
diff --git a/libbcache/journal.c b/libbcache/journal.c index e50d4085..585d1205 100644 --- a/libbcache/journal.c +++ b/libbcache/journal.c @@ -99,7 +99,7 @@ static struct jset_entry *bch_journal_find_entry(struct jset *j, unsigned type, return NULL; } -struct bkey_i *bch_journal_find_btree_root(struct cache_set *c, struct jset *j, +struct bkey_i *bch_journal_find_btree_root(struct bch_fs *c, struct jset *j, enum btree_id id, unsigned *level) { struct bkey_i *k; @@ -140,8 +140,8 @@ static inline void bch_journal_add_prios(struct journal *j, static void journal_seq_blacklist_flush(struct journal *j, struct journal_entry_pin *pin) { - struct cache_set *c = - container_of(j, struct cache_set, journal); + struct bch_fs *c = + container_of(j, struct bch_fs, journal); struct journal_seq_blacklist *bl = container_of(pin, struct journal_seq_blacklist, pin); struct blacklisted_node n; @@ -270,7 +270,7 @@ bch_journal_seq_blacklisted_new(struct journal *j, u64 seq) * as blacklisted so that on future restarts the corresponding data will still * be ignored: */ -int bch_journal_seq_should_ignore(struct cache_set *c, u64 seq, struct btree *b) +int bch_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b) { struct journal *j = &c->journal; struct journal_seq_blacklist *bl = NULL; @@ -357,7 +357,7 @@ out: /* * Journal replay/recovery: * - * This code is all driven from run_cache_set(); we first read the journal + * This code is all driven from bch_fs_start(); we first read the journal * entries, do some other stuff, then we mark all the keys in the journal * entries (same as garbage collection would), then we replay them - reinserting * them into the cache in precisely the same order as they appear in the @@ -381,7 +381,7 @@ struct journal_list { * Given a journal entry we just read, add it to the list of journal entries to * be replayed: */ -static int journal_entry_add(struct cache_set *c, struct journal_list *jlist, +static int journal_entry_add(struct bch_fs *c, struct journal_list *jlist, struct jset *j) { struct journal_replay *i, *pos; @@ -469,7 +469,7 @@ static void journal_entry_null_range(void *start, void *end) } } -static int journal_validate_key(struct cache_set *c, struct jset *j, +static int journal_validate_key(struct bch_fs *c, struct jset *j, struct jset_entry *entry, struct bkey_i *k, enum bkey_type key_type, const char *type) @@ -526,7 +526,7 @@ fsck_err: #define JOURNAL_ENTRY_NONE 6 #define JOURNAL_ENTRY_BAD 7 -static int journal_entry_validate(struct cache_set *c, +static int journal_entry_validate(struct bch_fs *c, struct jset *j, u64 sector, unsigned bucket_sectors_left, unsigned sectors_read) @@ -659,12 +659,12 @@ static int journal_read_buf_realloc(struct journal_read_buf *b, return 0; } -static int journal_read_bucket(struct cache *ca, +static int journal_read_bucket(struct bch_dev *ca, struct journal_read_buf *buf, struct journal_list *jlist, unsigned bucket, u64 *seq, bool *entries_found) { - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct journal_device *ja = &ca->journal; struct bio *bio = ja->bio; struct jset *j = NULL; @@ -776,7 +776,7 @@ static void bch_journal_read_device(struct closure *cl) struct journal_device *ja = container_of(cl, struct journal_device, read); - struct cache *ca = container_of(ja, struct cache, journal); + struct bch_dev *ca = container_of(ja, struct bch_dev, journal); struct journal_list *jlist = container_of(cl->parent, struct journal_list, cl); struct request_queue *q = bdev_get_queue(ca->disk_sb.bdev); @@ -897,6 +897,7 @@ search_done: break; out: free_pages((unsigned long) buf.data, get_order(buf.size)); + percpu_ref_put(&ca->io_ref); closure_return(cl); err: mutex_lock(&jlist->lock); @@ -921,7 +922,7 @@ static int journal_seq_blacklist_read(struct journal *j, struct journal_replay *i, struct journal_entry_pin_list *p) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct jset_entry *entry; struct journal_seq_blacklist *bl; u64 seq; @@ -957,14 +958,14 @@ static inline bool journal_has_keys(struct list_head *list) return false; } -int bch_journal_read(struct cache_set *c, struct list_head *list) +int bch_journal_read(struct bch_fs *c, struct list_head *list) { struct jset_entry *prio_ptrs; struct journal_list jlist; struct journal_replay *i; struct jset *j; struct journal_entry_pin_list *p; - struct cache *ca; + struct bch_dev *ca; u64 cur_seq, end_seq; unsigned iter; int ret = 0; @@ -974,11 +975,13 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) jlist.head = list; jlist.ret = 0; - for_each_cache(ca, c, iter) + for_each_readable_member(ca, c, iter) { + percpu_ref_get(&ca->io_ref); closure_call(&ca->journal.read, bch_journal_read_device, system_unbound_wq, &jlist.cl); + } closure_sync(&jlist.cl); @@ -1074,7 +1077,7 @@ fsck_err: return ret; } -void bch_journal_mark(struct cache_set *c, struct list_head *list) +void bch_journal_mark(struct bch_fs *c, struct list_head *list) { struct bkey_i *k, *n; struct jset_entry *j; @@ -1097,7 +1100,7 @@ static bool journal_entry_is_open(struct journal *j) void bch_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); if (!need_write_just_set && test_bit(JOURNAL_NEED_WRITE, &j->flags)) @@ -1161,7 +1164,7 @@ static enum { JOURNAL_UNLOCKED, } journal_buf_switch(struct journal *j, bool need_write_just_set) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *buf; union journal_res_state old, new; u64 v = atomic64_read(&j->reservations.counter); @@ -1244,7 +1247,7 @@ void bch_journal_halt(struct journal *j) } static unsigned journal_dev_buckets_available(struct journal *j, - struct cache *ca) + struct bch_dev *ca) { struct journal_device *ja = &ca->journal; unsigned next = (ja->cur_idx + 1) % ja->nr; @@ -1277,16 +1280,16 @@ static unsigned journal_dev_buckets_available(struct journal *j, /* returns number of sectors available for next journal entry: */ static int journal_entry_sectors(struct journal *j) { - struct cache_set *c = container_of(j, struct cache_set, journal); - struct cache *ca; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_dev *ca; struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); unsigned sectors_available = j->entry_size_max >> 9; unsigned i, nr_online = 0, nr_devs = 0; lockdep_assert_held(&j->lock); - rcu_read_lock(); - group_for_each_cache_rcu(ca, &j->devs, i) { + spin_lock(&j->devs.lock); + group_for_each_dev(ca, &j->devs, i) { unsigned buckets_required = 0; sectors_available = min_t(unsigned, sectors_available, @@ -1317,7 +1320,7 @@ static int journal_entry_sectors(struct journal *j) nr_devs++; nr_online++; } - rcu_read_unlock(); + spin_unlock(&j->devs.lock); if (nr_online < c->opts.metadata_replicas_required) return -EROFS; @@ -1401,7 +1404,7 @@ static int journal_entry_open(struct journal *j) return ret; } -void bch_journal_start(struct cache_set *c) +void bch_journal_start(struct bch_fs *c) { struct journal *j = &c->journal; struct journal_seq_blacklist *bl; @@ -1455,7 +1458,7 @@ void bch_journal_start(struct cache_set *c) queue_delayed_work(system_freezable_wq, &j->reclaim_work, 0); } -int bch_journal_replay(struct cache_set *c, struct list_head *list) +int bch_journal_replay(struct bch_fs *c, struct list_head *list) { int ret = 0, keys = 0, entries = 0; struct journal *j = &c->journal; @@ -1527,8 +1530,13 @@ err: return ret; } -static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca, - unsigned nr, bool write_super) +#if 0 +/* + * Allocate more journal space at runtime - not currently making use if it, but + * the code works: + */ +static int bch_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, + unsigned nr) { struct journal *j = &c->journal; struct journal_device *ja = &ca->journal; @@ -1615,8 +1623,7 @@ static int bch_set_nr_journal_buckets(struct cache_set *c, struct cache *ca, BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi)); - if (write_super) - bch_write_super(c); + bch_write_super(c); ret = 0; err: @@ -1628,9 +1635,15 @@ err: return ret; } +#endif -int bch_dev_journal_alloc(struct cache *ca) +int bch_dev_journal_alloc(struct bch_dev *ca) { + struct journal_device *ja = &ca->journal; + struct bch_sb_field_journal *journal_buckets; + unsigned i, nr; + u64 b, *p; + if (dynamic_fault("bcache:add:journal_alloc")) return -ENOMEM; @@ -1638,12 +1651,50 @@ int bch_dev_journal_alloc(struct cache *ca) * clamp journal size to 1024 buckets or 512MB (in sectors), whichever * is smaller: */ - return bch_set_nr_journal_buckets(ca->set, ca, - clamp_t(unsigned, ca->mi.nbuckets >> 8, - BCH_JOURNAL_BUCKETS_MIN, - min(1 << 10, - (1 << 20) / ca->mi.bucket_size)), - false); + nr = clamp_t(unsigned, ca->mi.nbuckets >> 8, + BCH_JOURNAL_BUCKETS_MIN, + min(1 << 10, + (1 << 20) / ca->mi.bucket_size)); + + p = krealloc(ja->bucket_seq, nr * sizeof(u64), + GFP_KERNEL|__GFP_ZERO); + if (!p) + return -ENOMEM; + + ja->bucket_seq = p; + + p = krealloc(ja->buckets, nr * sizeof(u64), + GFP_KERNEL|__GFP_ZERO); + if (!p) + return -ENOMEM; + + ja->buckets = p; + + journal_buckets = bch_sb_resize_journal(&ca->disk_sb, + nr + sizeof(*journal_buckets) / sizeof(u64)); + if (!journal_buckets) + return -ENOMEM; + + for (i = 0, b = ca->mi.first_bucket; + i < nr && b < ca->mi.nbuckets; b++) { + if (!is_available_bucket(ca->buckets[b].mark)) + continue; + + bch_mark_metadata_bucket(ca, &ca->buckets[b], + BUCKET_JOURNAL, true); + ja->buckets[i] = b; + journal_buckets->buckets[i] = cpu_to_le64(b); + i++; + } + + if (i < nr) + return -ENOSPC; + + BUG_ON(bch_validate_journal_layout(ca->disk_sb.sb, ca->mi)); + + ja->nr = nr; + + return 0; } /* Journalling */ @@ -1833,8 +1884,9 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) bool ret; spin_lock(&j->lock); - ret = (ja->last_idx != ja->cur_idx && - ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk); + ret = ja->nr && + (ja->last_idx != ja->cur_idx && + ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk); spin_unlock(&j->lock); return ret; @@ -1860,10 +1912,10 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) */ static void journal_reclaim_work(struct work_struct *work) { - struct cache_set *c = container_of(to_delayed_work(work), - struct cache_set, journal.reclaim_work); + struct bch_fs *c = container_of(to_delayed_work(work), + struct bch_fs, journal.reclaim_work); struct journal *j = &c->journal; - struct cache *ca; + struct bch_dev *ca; struct journal_entry_pin *pin; u64 seq_to_flush = 0; unsigned iter, bucket_to_flush; @@ -1874,9 +1926,12 @@ static void journal_reclaim_work(struct work_struct *work) * Advance last_idx to point to the oldest journal entry containing * btree node updates that have not yet been written out */ - group_for_each_cache(ca, &j->devs, iter) { + for_each_rw_member(ca, c, iter) { struct journal_device *ja = &ca->journal; + if (!ja->nr) + continue; + while (should_discard_bucket(j, ja)) { if (!reclaim_lock_held) { /* @@ -1954,17 +2009,16 @@ static void journal_reclaim_work(struct work_struct *work) */ static int journal_write_alloc(struct journal *j, unsigned sectors) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); struct bch_extent_ptr *ptr; struct journal_device *ja; - struct cache *ca; + struct bch_dev *ca; bool swapped; unsigned i, replicas, replicas_want = READ_ONCE(c->opts.metadata_replicas); spin_lock(&j->lock); - rcu_read_lock(); /* * Drop any pointers to devices that have been removed, are no longer @@ -1975,13 +2029,15 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) * entry - that's why we drop pointers to devices <= current free space, * i.e. whichever device was limiting the current journal entry size. */ - extent_for_each_ptr_backwards(e, ptr) - if (!(ca = PTR_CACHE(c, ptr)) || - ca->mi.state != BCH_MEMBER_STATE_ACTIVE || + extent_for_each_ptr_backwards(e, ptr) { + ca = c->devs[ptr->dev]; + + if (ca->mi.state != BCH_MEMBER_STATE_RW || ca->journal.sectors_free <= sectors) __bch_extent_drop_ptr(e, ptr); else ca->journal.sectors_free -= sectors; + } replicas = bch_extent_nr_ptrs(e.c); @@ -2003,8 +2059,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) * Pick devices for next journal write: * XXX: sort devices by free journal space? */ - for (i = 0; i < j->devs.nr; i++) { - ca = j->devs.d[i].dev; + group_for_each_dev(ca, &j->devs, i) { ja = &ca->journal; if (replicas >= replicas_want) @@ -2034,7 +2089,6 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) trace_bcache_journal_next_bucket(ca, ja->cur_idx, ja->last_idx); } spin_unlock(&j->devs.lock); - rcu_read_unlock(); j->prev_buf_sectors = 0; spin_unlock(&j->lock); @@ -2092,15 +2146,15 @@ static void journal_write_compact(struct jset *jset) static void journal_write_endio(struct bio *bio) { - struct cache *ca = bio->bi_private; - struct journal *j = &ca->set->journal; + struct bch_dev *ca = bio->bi_private; + struct journal *j = &ca->fs->journal; if (bch_dev_fatal_io_err_on(bio->bi_error, ca, "journal write") || bch_meta_write_fault("journal")) bch_journal_halt(j); closure_put(&j->io); - percpu_ref_put(&ca->ref); + percpu_ref_put(&ca->io_ref); } static void journal_write_done(struct closure *cl) @@ -2144,8 +2198,8 @@ static void journal_write_done(struct closure *cl) static void journal_write(struct closure *cl) { struct journal *j = container_of(cl, struct journal, io); - struct cache_set *c = container_of(j, struct cache_set, journal); - struct cache *ca; + struct bch_fs *c = container_of(j, struct bch_fs, journal); + struct bch_dev *ca; struct journal_buf *w = journal_prev_buf(j); struct jset *jset = w->data; struct bio *bio; @@ -2205,13 +2259,8 @@ static void journal_write(struct closure *cl) goto no_io; extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) { - rcu_read_lock(); - ca = PTR_CACHE(c, ptr); - if (ca) - percpu_ref_get(&ca->ref); - rcu_read_unlock(); - - if (!ca) { + ca = c->devs[ptr->dev]; + if (!percpu_ref_tryget(&ca->io_ref)) { /* XXX: fix this */ bch_err(c, "missing device for journal write\n"); continue; @@ -2236,11 +2285,10 @@ static void journal_write(struct closure *cl) ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); } - for_each_cache(ca, c, i) - if (ca->mi.state == BCH_MEMBER_STATE_ACTIVE && - journal_flushes_device(ca) && + for_each_rw_member(ca, c, i) + if (journal_flushes_device(ca) && !bch_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) { - percpu_ref_get(&ca->ref); + percpu_ref_get(&ca->io_ref); bio = ca->journal.bio; bio_reset(bio); @@ -2296,7 +2344,7 @@ u64 bch_inode_journal_seq(struct journal *j, u64 inode) static int __journal_res_get(struct journal *j, struct journal_res *res, unsigned u64s_min, unsigned u64s_max) { - struct cache_set *c = container_of(j, struct cache_set, journal); + struct bch_fs *c = container_of(j, struct bch_fs, journal); int ret; retry: ret = journal_res_get_fast(j, res, u64s_min, u64s_max); @@ -2552,7 +2600,7 @@ int bch_journal_flush(struct journal *j) ssize_t bch_journal_print_debug(struct journal *j, char *buf) { union journal_res_state *s = &j->reservations; - struct cache *ca; + struct bch_dev *ca; unsigned iter; ssize_t ret = 0; @@ -2583,7 +2631,8 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) journal_entry_is_open(j), test_bit(JOURNAL_REPLAY_DONE, &j->flags)); - group_for_each_cache_rcu(ca, &j->devs, iter) { + spin_lock(&j->devs.lock); + group_for_each_dev(ca, &j->devs, iter) { struct journal_device *ja = &ca->journal; ret += scnprintf(buf + ret, PAGE_SIZE - ret, @@ -2595,6 +2644,7 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) ja->cur_idx, ja->bucket_seq[ja->cur_idx], ja->last_idx, ja->bucket_seq[ja->last_idx]); } + spin_unlock(&j->devs.lock); spin_unlock(&j->lock); rcu_read_unlock(); @@ -2602,9 +2652,9 @@ ssize_t bch_journal_print_debug(struct journal *j, char *buf) return ret; } -static bool bch_journal_writing_to_device(struct cache *ca) +static bool bch_journal_writing_to_device(struct bch_dev *ca) { - struct journal *j = &ca->set->journal; + struct journal *j = &ca->fs->journal; bool ret; spin_lock(&j->lock); @@ -2627,11 +2677,11 @@ static bool bch_journal_writing_to_device(struct cache *ca) * writeable and pick a new set of devices to write to. */ -int bch_journal_move(struct cache *ca) +int bch_journal_move(struct bch_dev *ca) { u64 last_flushed_seq; struct journal_device *ja = &ca->journal; - struct cache_set *c = ca->set; + struct bch_fs *c = ca->fs; struct journal *j = &c->journal; unsigned i; int ret = 0; /* Success */ @@ -2698,21 +2748,26 @@ void bch_fs_journal_stop(struct journal *j) cancel_delayed_work_sync(&j->reclaim_work); } -void bch_dev_journal_exit(struct cache *ca) +void bch_dev_journal_exit(struct bch_dev *ca) { + kfree(ca->journal.bio); kfree(ca->journal.buckets); kfree(ca->journal.bucket_seq); + + ca->journal.bio = NULL; + ca->journal.buckets = NULL; + ca->journal.bucket_seq = NULL; } -int bch_dev_journal_init(struct cache *ca) +int bch_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb) { struct journal_device *ja = &ca->journal; struct bch_sb_field_journal *journal_buckets = - bch_sb_get_journal(ca->disk_sb.sb); + bch_sb_get_journal(sb); unsigned i, journal_entry_pages; journal_entry_pages = - DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(ca->disk_sb.sb), + DIV_ROUND_UP(1U << BCH_SB_JOURNAL_ENTRY_SIZE(sb), PAGE_SECTORS); ja->nr = bch_nr_journal_buckets(journal_buckets); |