summaryrefslogtreecommitdiff
path: root/libbcachefs/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/journal.c')
-rw-r--r--libbcachefs/journal.c329
1 files changed, 137 insertions, 192 deletions
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index ecae9b01..829e0648 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1629,8 +1629,7 @@ static int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
ja->nr++;
spin_unlock(&j->lock);
- bch2_mark_metadata_bucket(c, ca, &ca->buckets[bucket],
- BCH_DATA_JOURNAL,
+ bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB), 0);
@@ -2021,10 +2020,11 @@ static void journal_reclaim_work(struct work_struct *work)
/**
* journal_next_bucket - move on to the next journal bucket if possible
*/
-static int journal_write_alloc(struct journal *j, unsigned sectors)
+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
+ unsigned sectors)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
+ struct bkey_s_extent e;
struct bch_extent_ptr *ptr;
struct journal_device *ja;
struct bch_dev *ca;
@@ -2033,6 +2033,7 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
READ_ONCE(c->opts.metadata_replicas);
spin_lock(&j->lock);
+ e = bkey_i_to_s_extent(&j->key);
/*
* Drop any pointers to devices that have been removed, are no longer
@@ -2098,6 +2099,8 @@ static int journal_write_alloc(struct journal *j, unsigned sectors)
rcu_read_unlock();
j->prev_buf_sectors = 0;
+
+ bkey_copy(&w->key, &j->key);
spin_unlock(&j->lock);
if (replicas < c->opts.metadata_replicas_required)
@@ -2173,13 +2176,26 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
static void journal_write_done(struct closure *cl)
{
struct journal *j = container_of(cl, struct journal, io);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *w = journal_prev_buf(j);
+ struct bkey_s_c_extent e = bkey_i_to_s_c_extent(&w->key);
+
+ if (!bch2_extent_nr_ptrs(e)) {
+ bch_err(c, "unable to write journal to sufficient devices");
+ goto err;
+ }
+ if (bch2_check_mark_super(c, e, BCH_DATA_JOURNAL))
+ goto err;
+out:
__bch2_time_stats_update(j->write_time, j->write_start_time);
spin_lock(&j->lock);
j->last_seq_ondisk = le64_to_cpu(w->data->last_seq);
+ journal_seq_pin(j, le64_to_cpu(w->data->seq))->devs =
+ bch2_extent_devs(bkey_i_to_s_c_extent(&w->key));
+
/*
* Updating last_seq_ondisk may let journal_reclaim_work() discard more
* buckets:
@@ -2202,31 +2218,6 @@ static void journal_write_done(struct closure *cl)
if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
mod_delayed_work(system_freezable_wq, &j->write_work, 0);
spin_unlock(&j->lock);
-}
-
-static void journal_write_error(struct closure *cl)
-{
- struct journal *j = container_of(cl, struct journal, io);
- struct bch_fs *c = container_of(j, struct bch_fs, journal);
- struct bkey_s_extent e = bkey_i_to_s_extent(&j->key);
-
- while (j->replicas_failed) {
- unsigned idx = __fls(j->replicas_failed);
-
- bch2_extent_drop_ptr_idx(e, idx);
- j->replicas_failed ^= 1 << idx;
- }
-
- if (!bch2_extent_nr_ptrs(e.c)) {
- bch_err(c, "unable to write journal to sufficient devices");
- goto err;
- }
-
- if (bch2_check_mark_super(c, e.c, BCH_DATA_JOURNAL))
- goto err;
-
-out:
- journal_write_done(cl);
return;
err:
bch2_fatal_error(c);
@@ -2241,12 +2232,12 @@ static void journal_write_endio(struct bio *bio)
if (bch2_dev_io_err_on(bio->bi_status, ca, "journal write") ||
bch2_meta_write_fault("journal")) {
- /* Was this a flush or an actual journal write? */
- if (ca->journal.ptr_idx != U8_MAX) {
- set_bit(ca->journal.ptr_idx, &j->replicas_failed);
- set_closure_fn(&j->io, journal_write_error,
- system_highpri_wq);
- }
+ struct journal_buf *w = journal_prev_buf(j);
+ unsigned long flags;
+
+ spin_lock_irqsave(&j->err_lock, flags);
+ bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx);
+ spin_unlock_irqrestore(&j->err_lock, flags);
}
closure_put(&j->io);
@@ -2262,7 +2253,7 @@ static void journal_write(struct closure *cl)
struct jset *jset;
struct bio *bio;
struct bch_extent_ptr *ptr;
- unsigned i, sectors, bytes, ptr_idx = 0;
+ unsigned i, sectors, bytes;
journal_buf_realloc(j, w);
jset = w->data;
@@ -2309,20 +2300,13 @@ static void journal_write(struct closure *cl)
bytes = vstruct_bytes(w->data);
memset((void *) w->data + bytes, 0, (sectors << 9) - bytes);
- if (journal_write_alloc(j, sectors)) {
+ if (journal_write_alloc(j, w, sectors)) {
bch2_journal_halt(j);
bch_err(c, "Unable to allocate journal write");
bch2_fatal_error(c);
continue_at(cl, journal_write_done, system_highpri_wq);
}
- if (bch2_check_mark_super(c, bkey_i_to_s_c_extent(&j->key),
- BCH_DATA_JOURNAL))
- goto err;
-
- journal_seq_pin(j, le64_to_cpu(jset->seq))->devs =
- bch2_extent_devs(bkey_i_to_s_c_extent(&j->key));
-
/*
* XXX: we really should just disable the entire journal in nochanges
* mode
@@ -2330,7 +2314,7 @@ static void journal_write(struct closure *cl)
if (c->opts.nochanges)
goto no_io;
- extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr) {
+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
ca = bch_dev_bkey_exists(c, ptr->dev);
if (!percpu_ref_tryget(&ca->io_ref)) {
/* XXX: fix this */
@@ -2341,7 +2325,6 @@ static void journal_write(struct closure *cl)
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_JOURNAL],
sectors);
- ca->journal.ptr_idx = ptr_idx++;
bio = ca->journal.bio;
bio_reset(bio);
bio->bi_iter.bi_sector = ptr->offset;
@@ -2361,10 +2344,9 @@ static void journal_write(struct closure *cl)
for_each_rw_member(ca, c, i)
if (journal_flushes_device(ca) &&
- !bch2_extent_has_device(bkey_i_to_s_c_extent(&j->key), i)) {
+ !bch2_extent_has_device(bkey_i_to_s_c_extent(&w->key), i)) {
percpu_ref_get(&ca->io_ref);
- ca->journal.ptr_idx = U8_MAX;
bio = ca->journal.bio;
bio_reset(bio);
bio->bi_bdev = ca->disk_sb.bdev;
@@ -2375,7 +2357,7 @@ static void journal_write(struct closure *cl)
}
no_io:
- extent_for_each_ptr(bkey_i_to_s_extent(&j->key), ptr)
+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr)
ptr->offset += sectors;
continue_at(cl, journal_write_done, system_highpri_wq);
@@ -2779,163 +2761,32 @@ int bch2_journal_flush_device(struct journal *j, unsigned dev_idx)
return ret;
}
-ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
-{
- struct bch_fs *c = container_of(j, struct bch_fs, journal);
- union journal_res_state *s = &j->reservations;
- struct bch_dev *ca;
- unsigned iter;
- ssize_t ret = 0;
-
- rcu_read_lock();
- spin_lock(&j->lock);
-
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "active journal entries:\t%zu\n"
- "seq:\t\t\t%llu\n"
- "last_seq:\t\t%llu\n"
- "last_seq_ondisk:\t%llu\n"
- "reservation count:\t%u\n"
- "reservation offset:\t%u\n"
- "current entry u64s:\t%u\n"
- "io in flight:\t\t%i\n"
- "need write:\t\t%i\n"
- "dirty:\t\t\t%i\n"
- "replay done:\t\t%i\n",
- fifo_used(&j->pin),
- (u64) atomic64_read(&j->seq),
- last_seq(j),
- j->last_seq_ondisk,
- journal_state_count(*s, s->idx),
- s->cur_entry_offset,
- j->cur_entry_u64s,
- s->prev_buf_unwritten,
- test_bit(JOURNAL_NEED_WRITE, &j->flags),
- journal_entry_is_open(j),
- test_bit(JOURNAL_REPLAY_DONE, &j->flags));
-
- for_each_member_device_rcu(ca, c, iter,
- &c->rw_devs[BCH_DATA_JOURNAL]) {
- struct journal_device *ja = &ca->journal;
-
- if (!ja->nr)
- continue;
-
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "dev %u:\n"
- "\tnr\t\t%u\n"
- "\tcur_idx\t\t%u (seq %llu)\n"
- "\tlast_idx\t%u (seq %llu)\n",
- iter, ja->nr,
- ja->cur_idx, ja->bucket_seq[ja->cur_idx],
- ja->last_idx, ja->bucket_seq[ja->last_idx]);
- }
-
- spin_unlock(&j->lock);
- rcu_read_unlock();
-
- return ret;
-}
-
-ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
-{
- struct journal_entry_pin_list *pin_list;
- struct journal_entry_pin *pin;
- ssize_t ret = 0;
- unsigned i;
-
- spin_lock_irq(&j->pin_lock);
- fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "%llu: count %u\n",
- journal_pin_seq(j, pin_list),
- atomic_read(&pin_list->count));
-
- list_for_each_entry(pin, &pin_list->list, list)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "\t%p %pf\n",
- pin, pin->flush);
-
- if (!list_empty(&pin_list->flushed))
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "flushed:\n");
-
- list_for_each_entry(pin, &pin_list->flushed, list)
- ret += scnprintf(buf + ret, PAGE_SIZE - ret,
- "\t%p %pf\n",
- pin, pin->flush);
- }
- spin_unlock_irq(&j->pin_lock);
-
- return ret;
-}
+/* startup/shutdown: */
-static bool bch2_journal_writing_to_device(struct bch_dev *ca)
+static bool bch2_journal_writing_to_device(struct journal *j, unsigned dev_idx)
{
- struct journal *j = &ca->fs->journal;
+ union journal_res_state state;
+ struct journal_buf *w;
bool ret;
spin_lock(&j->lock);
- ret = bch2_extent_has_device(bkey_i_to_s_c_extent(&j->key),
- ca->dev_idx);
+ state = READ_ONCE(j->reservations);
+ w = j->buf + !state.idx;
+
+ ret = state.prev_buf_unwritten &&
+ bch2_extent_has_device(bkey_i_to_s_c_extent(&w->key), dev_idx);
spin_unlock(&j->lock);
return ret;
}
-/*
- * This asumes that ca has already been marked read-only so that
- * journal_next_bucket won't pick buckets out of ca any more.
- * Hence, if the journal is not currently pointing to ca, there
- * will be no new writes to journal entries in ca after all the
- * pending ones have been flushed to disk.
- *
- * If the journal is being written to ca, write a new record, and
- * journal_next_bucket will notice that the device is no longer
- * writeable and pick a new set of devices to write to.
- */
-
-int bch2_journal_move(struct bch_dev *ca)
+void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
{
- struct journal_device *ja = &ca->journal;
- struct journal *j = &ca->fs->journal;
- u64 seq_to_flush = 0;
- unsigned i;
- int ret;
-
- if (bch2_journal_writing_to_device(ca)) {
- /*
- * bch_journal_meta will write a record and we'll wait
- * for the write to complete.
- * Actually writing the journal (journal_write_locked)
- * will call journal_next_bucket which notices that the
- * device is no longer writeable, and picks a new one.
- */
- bch2_journal_meta(j);
- BUG_ON(bch2_journal_writing_to_device(ca));
- }
-
- for (i = 0; i < ja->nr; i++)
- seq_to_flush = max(seq_to_flush, ja->bucket_seq[i]);
-
- bch2_journal_flush_pins(j, seq_to_flush);
-
- /*
- * Force a meta-data journal entry to be written so that
- * we have newer journal entries in devices other than ca,
- * and wait for the meta data write to complete.
- */
- bch2_journal_meta(j);
-
- /*
- * Verify that we no longer need any of the journal entries in
- * the device
- */
spin_lock(&j->lock);
- ret = j->last_seq_ondisk > seq_to_flush ? 0 : -EIO;
+ bch2_extent_drop_device(bkey_i_to_s_extent(&j->key), ca->dev_idx);
spin_unlock(&j->lock);
- return ret;
+ wait_event(j->wait, !bch2_journal_writing_to_device(j, ca->dev_idx));
}
void bch2_fs_journal_stop(struct journal *j)
@@ -3006,6 +2857,7 @@ int bch2_fs_journal_init(struct journal *j)
spin_lock_init(&j->lock);
spin_lock_init(&j->pin_lock);
+ spin_lock_init(&j->err_lock);
init_waitqueue_head(&j->wait);
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, journal_reclaim_work);
@@ -3035,3 +2887,96 @@ int bch2_fs_journal_init(struct journal *j)
return 0;
}
+
+/* debug: */
+
+ssize_t bch2_journal_print_debug(struct journal *j, char *buf)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ union journal_res_state *s = &j->reservations;
+ struct bch_dev *ca;
+ unsigned iter;
+ ssize_t ret = 0;
+
+ rcu_read_lock();
+ spin_lock(&j->lock);
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+ "active journal entries:\t%zu\n"
+ "seq:\t\t\t%llu\n"
+ "last_seq:\t\t%llu\n"
+ "last_seq_ondisk:\t%llu\n"
+ "reservation count:\t%u\n"
+ "reservation offset:\t%u\n"
+ "current entry u64s:\t%u\n"
+ "io in flight:\t\t%i\n"
+ "need write:\t\t%i\n"
+ "dirty:\t\t\t%i\n"
+ "replay done:\t\t%i\n",
+ fifo_used(&j->pin),
+ (u64) atomic64_read(&j->seq),
+ last_seq(j),
+ j->last_seq_ondisk,
+ journal_state_count(*s, s->idx),
+ s->cur_entry_offset,
+ j->cur_entry_u64s,
+ s->prev_buf_unwritten,
+ test_bit(JOURNAL_NEED_WRITE, &j->flags),
+ journal_entry_is_open(j),
+ test_bit(JOURNAL_REPLAY_DONE, &j->flags));
+
+ for_each_member_device_rcu(ca, c, iter,
+ &c->rw_devs[BCH_DATA_JOURNAL]) {
+ struct journal_device *ja = &ca->journal;
+
+ if (!ja->nr)
+ continue;
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+ "dev %u:\n"
+ "\tnr\t\t%u\n"
+ "\tcur_idx\t\t%u (seq %llu)\n"
+ "\tlast_idx\t%u (seq %llu)\n",
+ iter, ja->nr,
+ ja->cur_idx, ja->bucket_seq[ja->cur_idx],
+ ja->last_idx, ja->bucket_seq[ja->last_idx]);
+ }
+
+ spin_unlock(&j->lock);
+ rcu_read_unlock();
+
+ return ret;
+}
+
+ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
+{
+ struct journal_entry_pin_list *pin_list;
+ struct journal_entry_pin *pin;
+ ssize_t ret = 0;
+ unsigned i;
+
+ spin_lock_irq(&j->pin_lock);
+ fifo_for_each_entry_ptr(pin_list, &j->pin, i) {
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+ "%llu: count %u\n",
+ journal_pin_seq(j, pin_list),
+ atomic_read(&pin_list->count));
+
+ list_for_each_entry(pin, &pin_list->list, list)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+ "\t%p %pf\n",
+ pin, pin->flush);
+
+ if (!list_empty(&pin_list->flushed))
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+ "flushed:\n");
+
+ list_for_each_entry(pin, &pin_list->flushed, list)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+ "\t%p %pf\n",
+ pin, pin->flush);
+ }
+ spin_unlock_irq(&j->pin_lock);
+
+ return ret;
+}