summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2016-09-23 16:20:48 -0800
committerKent Overstreet <kent.overstreet@gmail.com>2016-09-23 16:20:48 -0800
commit7a8083b4fd1e6623ae343edcf40f5f2230392ee4 (patch)
tree5a146f9d2ba669aa8cc1832910d7b70bda48a7db
parent40bbc456dbc1c5b08d53a82515441b0695ef83d8 (diff)
bcache: add flag to superblock indicating if we shutdown cleanly
-rw-r--r--drivers/md/bcache/fs-gc.c47
-rw-r--r--drivers/md/bcache/super.c47
-rw-r--r--include/uapi/linux/bcache.h6
3 files changed, 74 insertions, 26 deletions
diff --git a/drivers/md/bcache/fs-gc.c b/drivers/md/bcache/fs-gc.c
index 643143629495..e68d35026051 100644
--- a/drivers/md/bcache/fs-gc.c
+++ b/drivers/md/bcache/fs-gc.c
@@ -105,7 +105,6 @@ s64 bch_count_inode_sectors(struct cache_set *c, u64 inum)
static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
struct bkey_s_c_inode inode, struct nlink link)
{
- struct bkey_i_inode update;
u16 i_mode = le16_to_cpu(inode.v->i_mode);
u32 i_flags = le32_to_cpu(inode.v->i_flags);
u32 i_nlink = le32_to_cpu(inode.v->i_nlink);
@@ -119,9 +118,15 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
mode_to_type(i_mode));
if (!link.count) {
+ cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+ "filesystem marked clean, "
+ "but found orphaned inode %llu",
+ inode.k->p.inode);
+
cache_set_inconsistent_on(S_ISDIR(i_mode) &&
bch_empty_dir(c, inode.k->p.inode), c,
- "non empty directory with link count 0,inode nlink %u, dir links found %u",
+ "non empty directory with link count 0, "
+ "inode nlink %u, dir links found %u",
i_nlink, link.dir_count);
bch_verbose(c, "deleting inum %llu", inode.k->p.inode);
@@ -133,6 +138,11 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
}
if (i_flags & BCH_INODE_I_SIZE_DIRTY) {
+ cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+ "filesystem marked clean, "
+ "but inode %llu has i_size dirty",
+ inode.k->p.inode);
+
bch_verbose(c, "truncating inode %llu", inode.k->p.inode);
/*
@@ -144,7 +154,8 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
round_up(i_size, PAGE_SIZE) >> 9,
NULL, NULL);
if (ret) {
- bch_err(c, "error in fs gc: error %i while truncating inode", ret);
+ bch_err(c, "error in fs gc: error %i "
+ "truncating inode", ret);
return ret;
}
@@ -156,23 +167,40 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
}
if (i_flags & BCH_INODE_I_SECTORS_DIRTY) {
+ cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+ "filesystem marked clean, "
+ "but inode %llu has i_sectors dirty",
+ inode.k->p.inode);
+
bch_verbose(c, "recounting sectors for inode %llu", inode.k->p.inode);
i_sectors = bch_count_inode_sectors(c, inode.k->p.inode);
if (i_sectors < 0) {
- bch_err(c, "error in fs gc: error %i recounting inode sectors",
+ bch_err(c, "error in fs gc: error %i "
+ "recounting inode sectors",
(int) i_sectors);
return i_sectors;
}
}
+ if (i_nlink != link.count + link.dir_count) {
+ cache_set_inconsistent_on(CACHE_SET_CLEAN(&c->disk_sb), c,
+ "filesystem marked clean, "
+ "but inode %llu has wrong i_nlink "
+ "(type %u i_nlink %u, should be %u)",
+ inode.k->p.inode,
+ mode_to_type(i_mode), i_nlink,
+ link.count + link.dir_count);
+
+ bch_verbose(c, "setting inum %llu nlinks from %u to %u",
+ inode.k->p.inode, i_nlink,
+ link.count + link.dir_count);
+ }
+
if (i_nlink != link.count + link.dir_count ||
i_flags & BCH_INODE_I_SECTORS_DIRTY ||
i_flags & BCH_INODE_I_SIZE_DIRTY) {
- if (i_nlink != link.count + link.dir_count)
- bch_verbose(c, "setting inum %llu nlinks from %u to %u",
- inode.k->p.inode, i_nlink,
- link.count + link.dir_count);
+ struct bkey_i_inode update;
bkey_reassemble(&update.k_i, inode.s_c);
update.v.i_nlink = cpu_to_le32(link.count + link.dir_count);
@@ -186,7 +214,8 @@ static int bch_gc_do_inode(struct cache_set *c, struct btree_iter *iter,
BTREE_INSERT_NOFAIL,
BTREE_INSERT_ENTRY(iter, &update.k_i));
if (ret && ret != -EINTR)
- bch_err(c, "error in fs gc: error %i while updating inode", ret);
+ bch_err(c, "error in fs gc: error %i "
+ "updating inode", ret);
}
return ret;
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 77a6f1bc5916..66f06ec0ff1b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -698,6 +698,9 @@ static void __bch_cache_set_read_only(struct cache_set *c)
bch_btree_flush(c);
+ for_each_cache(ca, c, i)
+ bch_cache_allocator_stop(ca);
+
/*
* Write a journal entry after flushing the btree, so we don't end up
* replaying everything we just flushed:
@@ -705,9 +708,6 @@ static void __bch_cache_set_read_only(struct cache_set *c)
if (test_bit(CACHE_SET_INITIAL_GC_DONE, &c->flags))
bch_journal_meta(&c->journal);
- for_each_cache(ca, c, i)
- bch_cache_allocator_stop(ca);
-
cancel_delayed_work_sync(&c->journal.write_work);
}
@@ -732,22 +732,34 @@ static void bch_cache_set_read_only_work(struct work_struct *work)
c->foreground_write_pd.rate.rate = UINT_MAX;
bch_wake_delayed_writes((unsigned long) c);
- /*
- * If we're not doing an emergency shutdown, we want to wait on
- * outstanding writes to complete so they don't see spurious errors due
- * to shutting down the allocator.
- *
- * If we are doing an emergency shutdown, outstanding writes may hang
- * until we shutdown the allocator, so we don't want to wait here:
- */
- wait_event(bch_read_only_wait,
- test_bit(CACHE_SET_EMERGENCY_RO, &c->flags) ||
- test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+ if (!test_bit(CACHE_SET_EMERGENCY_RO, &c->flags)) {
+ /*
+ * If we're not doing an emergency shutdown, we want to wait on
+ * outstanding writes to complete so they don't see spurious
+ * errors due to shutting down the allocator:
+ */
+ wait_event(bch_read_only_wait,
+ test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
- __bch_cache_set_read_only(c);
+ __bch_cache_set_read_only(c);
- wait_event(bch_read_only_wait,
- test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+ if (!bch_journal_error(&c->journal)) {
+ SET_CACHE_SET_CLEAN(&c->disk_sb, true);
+ bcache_write_super(c);
+ }
+ } else {
+ /*
+ * If we are doing an emergency shutdown outstanding writes may
+ * hang until we shutdown the allocator so we don't want to wait
+ * on outstanding writes before shutting everything down - but
+ * we do need to wait on them before returning and signalling
+ * that going RO is complete:
+ */
+ __bch_cache_set_read_only(c);
+
+ wait_event(bch_read_only_wait,
+ test_bit(CACHE_SET_WRITE_DISABLE_COMPLETE, &c->flags));
+ }
bch_notify_cache_set_read_only(c);
trace_bcache_cache_set_read_only_done(c);
@@ -1423,6 +1435,7 @@ static const char *run_cache_set(struct cache_set *c)
c->disk_mi[ca->sb.nr_this_dev].last_mount = cpu_to_le64(now);
rcu_read_unlock();
+ SET_CACHE_SET_CLEAN(&c->disk_sb, false);
bcache_write_super(c);
err = "dynamic fault";
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 6006d50418cf..999ed8f4c535 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -790,6 +790,12 @@ LE64_BITMASK(CACHE_SET_GC_RESERVE, struct cache_sb, flags, 57, 63);
LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6);
+/*
+ * Did we shut down cleanly? Just a hint, doesn't affect behaviour of
+ * mount/recovery path:
+ */
+LE64_BITMASK(CACHE_SET_CLEAN, struct cache_sb, flags2, 6, 7);
+
/* options: */
/**