diff options
Diffstat (limited to 'libbcachefs/journal.c')
-rw-r--r-- | libbcachefs/journal.c | 329 |
1 files changed, 137 insertions, 192 deletions
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index ecae9b01..829e0648 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -1629,8 +1629,7 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, ja->nr++; spin_unlock(&j->lock); - bch2_mark_metadata_bucket(c, ca, &ca->buckets[bucket], - BCH_DATA_JOURNAL, + bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, ca->mi.bucket_size, gc_phase(GC_PHASE_SB), 0); @@ -2021,10 +2020,11 @@ static void journal_reclaim_work(struct work_struct *work) /** * journal_next_bucket - move on to the next journal bucket if possible */ -static int journal_write_alloc(struct journal *j, unsigned sectors) +static int journal_write_alloc(struct journal *j, struct journal_buf *w, + unsigned sectors) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); + struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct journal_device *ja; struct bch_dev *ca; @@ -2033,6 +2033,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) READ_ONCE(c->opts.metadata_replicas); spin_lock(&j->lock); + e = bkey_i_to_s_extent(&j->key); /* * Drop any pointers to devices that have been removed, are no longer @@ -2098,6 +2099,8 @@ static int journal_write_alloc(struct journal *j, unsigned sectors) rcu_read_unlock(); j->prev_buf_sectors = 0; + + bkey_copy(&w->key, &j->key); spin_unlock(&j->lock); if (replicas < c->opts.metadata_replicas_required) @@ -2173,13 +2176,26 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) static void journal_write_done(struct closure *cl) { struct journal *j = container_of(cl, struct journal, io); + struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_prev_buf(j); + struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&w->key); + + if (!bch2_extent_nr_ptrs(e)) { + bch_err(c, "unable to write journal to sufficient devices"); + goto err; + } + if (bch2_check_mark_super(c, e, BCH_DATA_JOURNAL)) + goto err; +out: __bch2_time_stats_update(j->write_time, j->write_start_time); spin_lock(&j->lock); j->last_seq_ondisk = le64_to_cpu(w->data->last_seq); + journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs = + bch2_extent_devs(bkey_i_to_s_c_extent(&w->key)); + /* * Updating last_seq_ondisk may let journal_reclaim_work() discard more * buckets: @@ -2202,31 +2218,6 @@ static void journal_write_done(struct closure *cl) if (test_bit(JOURNAL_NEED_WRITE, &j->flags)) mod_delayed_work(system_freezable_wq, &j->write_work, 0); spin_unlock(&j->lock); -} - -static void journal_write_error(struct closure *cl) -{ - struct journal *j = container_of(cl, struct journal, io); - struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_s_extent e = bkey_i_to_s_extent(&j->key); - - while (j->replicas_failed) { - unsigned idx = __fls(j->replicas_failed); - - bch2_extent_drop_ptr_idx(e, idx); - j->replicas_failed ^= 1 << idx; - } - - if (!bch2_extent_nr_ptrs(e.c)) { - bch_err(c, "unable to write journal to sufficient devices"); - goto err; - } - - if (bch2_check_mark_super(c, e.c, BCH_DATA_JOURNAL)) - goto err; - -out: - journal_write_done(cl); return; err: bch2_fatal_error(c); @@ -2241,12 +2232,12 @@ static void journal_write_endio(struct bio *bio) if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write") || bch2_meta_write_fault("journal")) { - /* Was this a flush or an actual journal write? */ - if (ca->journal.ptr_idx != U8_MAX) { - set_bit(ca->journal.ptr_idx, &j->replicas_failed); - set_closure_fn(&j->io, journal_write_error, - system_highpri_wq); - } + struct journal_buf *w = journal_prev_buf(j); + unsigned long flags; + + spin_lock_irqsave(&j->err_lock, flags); + bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx); + spin_unlock_irqrestore(&j->err_lock, flags); } closure_put(&j->io); @@ -2262,7 +2253,7 @@ static void journal_write(struct closure *cl) struct jset *jset; struct bio *bio; struct bch_extent_ptr *ptr; - unsigned i, sectors, bytes, ptr_idx = 0; + unsigned i, sectors, bytes; journal_buf_realloc(j, w); jset = w->data; @@ -2309,20 +2300,13 @@ static void journal_write(struct closure *cl) bytes = vstruct_bytes(w->data); memset((void *) w->data + bytes, 0, (sectors << 9) - bytes); - if (journal_write_alloc(j, sectors)) { + if (journal_write_alloc(j, w, sectors)) { bch2_journal_halt(j); bch_err(c, "Unable to allocate journal write"); bch2_fatal_error(c); continue_at(cl, journal_write_done, system_highpri_wq); } - if (bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key), - BCH_DATA_JOURNAL)) - goto err; - - journal_seq_pin(j, le64_to_cpu(jset->seq))->devs = - bch2_extent_devs(bkey_i_to_s_c_extent(&j->key)); - /* * XXX: we really should just disable the entire journal in nochanges * mode @@ -2330,7 +2314,7 @@ static void journal_write(struct closure *cl) if (c->opts.nochanges) goto no_io; - extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) { + extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) { ca = bch_dev_bkey_exists(c, ptr->dev); if (!percpu_ref_tryget(&ca->io_ref)) { /* XXX: fix this */ @@ -2341,7 +2325,6 @@ static void journal_write(struct closure *cl) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_JOURNAL], sectors); - ca->journal.ptr_idx = ptr_idx++; bio = ca->journal.bio; bio_reset(bio); bio->bi_iter.bi_sector = ptr->offset; @@ -2361,10 +2344,9 @@ static void journal_write(struct closure *cl) for_each_rw_member(ca, c, i) if (journal_flushes_device(ca) && - !bch2_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) { + !bch2_extent_has_device(bkey_i_to_s_c_extent(&w->key), i)) { percpu_ref_get(&ca->io_ref); - ca->journal.ptr_idx = U8_MAX; bio = ca->journal.bio; bio_reset(bio); bio->bi_bdev = ca->disk_sb.bdev; @@ -2375,7 +2357,7 @@ static void journal_write(struct closure *cl) } no_io: - extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) + extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) ptr->offset += sectors; continue_at(cl, journal_write_done, system_highpri_wq); @@ -2779,163 +2761,32 @@ int bch2_journal_flush_device(struct journal *j, unsigned dev_idx) return ret; } -ssize_t bch2_journal_print_debug(struct journal *j, char *buf) -{ - struct bch_fs *c = container_of(j, struct bch_fs, journal); - union journal_res_state *s = &j->reservations; - struct bch_dev *ca; - unsigned iter; - ssize_t ret = 0; - - rcu_read_lock(); - spin_lock(&j->lock); - - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "active journal entries:\t%zu\n" - "seq:\t\t\t%llu\n" - "last_seq:\t\t%llu\n" - "last_seq_ondisk:\t%llu\n" - "reservation count:\t%u\n" - "reservation offset:\t%u\n" - "current entry u64s:\t%u\n" - "io in flight:\t\t%i\n" - "need write:\t\t%i\n" - "dirty:\t\t\t%i\n" - "replay done:\t\t%i\n", - fifo_used(&j->pin), - (u64) atomic64_read(&j->seq), - last_seq(j), - j->last_seq_ondisk, - journal_state_count(*s, s->idx), - s->cur_entry_offset, - j->cur_entry_u64s, - s->prev_buf_unwritten, - test_bit(JOURNAL_NEED_WRITE, &j->flags), - journal_entry_is_open(j), - test_bit(JOURNAL_REPLAY_DONE, &j->flags)); - - for_each_member_device_rcu(ca, c, iter, - &c->rw_devs[BCH_DATA_JOURNAL]) { - struct journal_device *ja = &ca->journal; - - if (!ja->nr) - continue; - - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "dev %u:\n" - "\tnr\t\t%u\n" - "\tcur_idx\t\t%u (seq %llu)\n" - "\tlast_idx\t%u (seq %llu)\n", - iter, ja->nr, - ja->cur_idx, ja->bucket_seq[ja->cur_idx], - ja->last_idx, ja->bucket_seq[ja->last_idx]); - } - - spin_unlock(&j->lock); - rcu_read_unlock(); - - return ret; -} - -ssize_t bch2_journal_print_pins(struct journal *j, char *buf) -{ - struct journal_entry_pin_list *pin_list; - struct journal_entry_pin *pin; - ssize_t ret = 0; - unsigned i; - - spin_lock_irq(&j->pin_lock); - fifo_for_each_entry_ptr(pin_list, &j->pin, i) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%llu: count %u\n", - journal_pin_seq(j, pin_list), - atomic_read(&pin_list->count)); - - list_for_each_entry(pin, &pin_list->list, list) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "\t%p %pf\n", - pin, pin->flush); - - if (!list_empty(&pin_list->flushed)) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "flushed:\n"); - - list_for_each_entry(pin, &pin_list->flushed, list) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "\t%p %pf\n", - pin, pin->flush); - } - spin_unlock_irq(&j->pin_lock); - - return ret; -} +/* startup/shutdown: */ -static bool bch2_journal_writing_to_device(struct bch_dev *ca) +static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx) { - struct journal *j = &ca->fs->journal; + union journal_res_state state; + struct journal_buf *w; bool ret; spin_lock(&j->lock); - ret = bch2_extent_has_device(bkey_i_to_s_c_extent(&j->key), - ca->dev_idx); + state = READ_ONCE(j->reservations); + w = j->buf + !state.idx; + + ret = state.prev_buf_unwritten && + bch2_extent_has_device(bkey_i_to_s_c_extent(&w->key), dev_idx); spin_unlock(&j->lock); return ret; } -/* - * This asumes that ca has already been marked read-only so that - * journal_next_bucket won't pick buckets out of ca any more. - * Hence, if the journal is not currently pointing to ca, there - * will be no new writes to journal entries in ca after all the - * pending ones have been flushed to disk. - * - * If the journal is being written to ca, write a new record, and - * journal_next_bucket will notice that the device is no longer - * writeable and pick a new set of devices to write to. - */ - -int bch2_journal_move(struct bch_dev *ca) +void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca) { - struct journal_device *ja = &ca->journal; - struct journal *j = &ca->fs->journal; - u64 seq_to_flush = 0; - unsigned i; - int ret; - - if (bch2_journal_writing_to_device(ca)) { - /* - * bch_journal_meta will write a record and we'll wait - * for the write to complete. - * Actually writing the journal (journal_write_locked) - * will call journal_next_bucket which notices that the - * device is no longer writeable, and picks a new one. - */ - bch2_journal_meta(j); - BUG_ON(bch2_journal_writing_to_device(ca)); - } - - for (i = 0; i < ja->nr; i++) - seq_to_flush = max(seq_to_flush, ja->bucket_seq[i]); - - bch2_journal_flush_pins(j, seq_to_flush); - - /* - * Force a meta-data journal entry to be written so that - * we have newer journal entries in devices other than ca, - * and wait for the meta data write to complete. - */ - bch2_journal_meta(j); - - /* - * Verify that we no longer need any of the journal entries in - * the device - */ spin_lock(&j->lock); - ret = j->last_seq_ondisk > seq_to_flush ? 0 : -EIO; + bch2_extent_drop_device(bkey_i_to_s_extent(&j->key), ca->dev_idx); spin_unlock(&j->lock); - return ret; + wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx)); } void bch2_fs_journal_stop(struct journal *j) @@ -3006,6 +2857,7 @@ int bch2_fs_journal_init(struct journal *j) spin_lock_init(&j->lock); spin_lock_init(&j->pin_lock); + spin_lock_init(&j->err_lock); init_waitqueue_head(&j->wait); INIT_DELAYED_WORK(&j->write_work, journal_write_work); INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work); @@ -3035,3 +2887,96 @@ int bch2_fs_journal_init(struct journal *j) return 0; } + +/* debug: */ + +ssize_t bch2_journal_print_debug(struct journal *j, char *buf) +{ + struct bch_fs *c = container_of(j, struct bch_fs, journal); + union journal_res_state *s = &j->reservations; + struct bch_dev *ca; + unsigned iter; + ssize_t ret = 0; + + rcu_read_lock(); + spin_lock(&j->lock); + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "active journal entries:\t%zu\n" + "seq:\t\t\t%llu\n" + "last_seq:\t\t%llu\n" + "last_seq_ondisk:\t%llu\n" + "reservation count:\t%u\n" + "reservation offset:\t%u\n" + "current entry u64s:\t%u\n" + "io in flight:\t\t%i\n" + "need write:\t\t%i\n" + "dirty:\t\t\t%i\n" + "replay done:\t\t%i\n", + fifo_used(&j->pin), + (u64) atomic64_read(&j->seq), + last_seq(j), + j->last_seq_ondisk, + journal_state_count(*s, s->idx), + s->cur_entry_offset, + j->cur_entry_u64s, + s->prev_buf_unwritten, + test_bit(JOURNAL_NEED_WRITE, &j->flags), + journal_entry_is_open(j), + test_bit(JOURNAL_REPLAY_DONE, &j->flags)); + + for_each_member_device_rcu(ca, c, iter, + &c->rw_devs[BCH_DATA_JOURNAL]) { + struct journal_device *ja = &ca->journal; + + if (!ja->nr) + continue; + + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "dev %u:\n" + "\tnr\t\t%u\n" + "\tcur_idx\t\t%u (seq %llu)\n" + "\tlast_idx\t%u (seq %llu)\n", + iter, ja->nr, + ja->cur_idx, ja->bucket_seq[ja->cur_idx], + ja->last_idx, ja->bucket_seq[ja->last_idx]); + } + + spin_unlock(&j->lock); + rcu_read_unlock(); + + return ret; +} + +ssize_t bch2_journal_print_pins(struct journal *j, char *buf) +{ + struct journal_entry_pin_list *pin_list; + struct journal_entry_pin *pin; + ssize_t ret = 0; + unsigned i; + + spin_lock_irq(&j->pin_lock); + fifo_for_each_entry_ptr(pin_list, &j->pin, i) { + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "%llu: count %u\n", + journal_pin_seq(j, pin_list), + atomic_read(&pin_list->count)); + + list_for_each_entry(pin, &pin_list->list, list) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "\t%p %pf\n", + pin, pin->flush); + + if (!list_empty(&pin_list->flushed)) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "flushed:\n"); + + list_for_each_entry(pin, &pin_list->flushed, list) + ret += scnprintf(buf + ret, PAGE_SIZE - ret, + "\t%p %pf\n", + pin, pin->flush); + } + spin_unlock_irq(&j->pin_lock); + + return ret; +} |