summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--cmd_device.c11
-rw-r--r--cmd_dump.c4
-rw-r--r--cmd_format.c8
-rw-r--r--cmd_fsck.c2
-rw-r--r--cmd_fusemount.c3
-rw-r--r--cmd_key.c6
-rw-r--r--cmd_kill_btree_node.c5
-rw-r--r--cmd_list.c10
-rw-r--r--cmd_list_journal.c3
-rw-r--r--cmd_migrate.c23
-rw-r--r--cmd_option.c3
-rw-r--r--include/linux/slab.h56
-rw-r--r--include/trace/events/bcachefs.h36
-rw-r--r--libbcachefs.h6
-rw-r--r--libbcachefs/alloc_background.c203
-rw-r--r--libbcachefs/alloc_background.h16
-rw-r--r--libbcachefs/alloc_foreground.c47
-rw-r--r--libbcachefs/backpointers.c51
-rw-r--r--libbcachefs/bcachefs.h3
-rw-r--r--libbcachefs/bcachefs_format.h3
-rw-r--r--libbcachefs/bkey_methods.c2
-rw-r--r--libbcachefs/btree_cache.c4
-rw-r--r--libbcachefs/btree_gc.c29
-rw-r--r--libbcachefs/btree_iter.c23
-rw-r--r--libbcachefs/btree_iter.h2
-rw-r--r--libbcachefs/btree_types.h18
-rw-r--r--libbcachefs/btree_update.h3
-rw-r--r--libbcachefs/btree_update_interior.c2
-rw-r--r--libbcachefs/btree_update_leaf.c132
-rw-r--r--libbcachefs/data_update.c13
-rw-r--r--libbcachefs/ec.c2
-rw-r--r--libbcachefs/errcode.h17
-rw-r--r--libbcachefs/inode.c2
-rw-r--r--libbcachefs/io.c35
-rw-r--r--libbcachefs/journal.c74
-rw-r--r--libbcachefs/journal.h3
-rw-r--r--libbcachefs/journal_io.c30
-rw-r--r--libbcachefs/journal_io.h2
-rw-r--r--libbcachefs/journal_types.h8
-rw-r--r--libbcachefs/lru.c118
-rw-r--r--libbcachefs/lru.h26
-rw-r--r--libbcachefs/move.c21
-rw-r--r--libbcachefs/movinggc.c2
-rw-r--r--libbcachefs/nocow_locking.c114
-rw-r--r--libbcachefs/nocow_locking.h50
-rw-r--r--libbcachefs/nocow_locking_types.h20
-rw-r--r--libbcachefs/recovery.c76
-rw-r--r--libbcachefs/reflink.c2
-rw-r--r--libbcachefs/super-io.c9
-rw-r--r--libbcachefs/super.c2
-rw-r--r--libbcachefs/sysfs.c6
-rw-r--r--linux/shrinker.c29
53 files changed, 852 insertions, 525 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 0ba18448..615d94b8 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-de3b30303e8a52dcbf738065efb4cf183fdbf1c1
+0939e1c73231c779c961e1143e1ba489ef2b168c
diff --git a/cmd_device.c b/cmd_device.c
index e3c5d513..c59d3709 100644
--- a/cmd_device.c
+++ b/cmd_device.c
@@ -14,6 +14,7 @@
#include "libbcachefs/bcachefs.h"
#include "libbcachefs/bcachefs_ioctl.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/journal.h"
#include "libbcachefs/super-io.h"
#include "cmds.h"
@@ -410,7 +411,7 @@ int cmd_device_set_state(int argc, char *argv[])
int ret = bch2_read_super(dev_str, &opts, &sb);
if (ret)
- die("error opening %s: %s", dev_str, strerror(-ret));
+ die("error opening %s: %s", dev_str, bch2_err_str(ret));
struct bch_member *m = bch2_sb_get_members(sb.sb)->members + sb.sb->dev_idx;
@@ -527,7 +528,7 @@ int cmd_device_resize(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
if (IS_ERR(c))
- die("error opening %s: %s", dev, strerror(-PTR_ERR(c)));
+ die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
struct bch_dev *ca, *resize = NULL;
unsigned i;
@@ -547,7 +548,7 @@ int cmd_device_resize(int argc, char *argv[])
printf("resizing %s to %llu buckets\n", dev, nbuckets);
int ret = bch2_dev_resize(c, resize, nbuckets);
if (ret)
- fprintf(stderr, "resize error: %s\n", strerror(-ret));
+ fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
percpu_ref_put(&resize->io_ref);
bch2_fs_stop(c);
@@ -630,7 +631,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty());
if (IS_ERR(c))
- die("error opening %s: %s", dev, strerror(-PTR_ERR(c)));
+ die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c)));
struct bch_dev *ca, *resize = NULL;
unsigned i;
@@ -647,7 +648,7 @@ int cmd_device_resize_journal(int argc, char *argv[])
printf("resizing journal on %s to %llu buckets\n", dev, nbuckets);
int ret = bch2_set_nr_journal_buckets(c, resize, nbuckets);
if (ret)
- fprintf(stderr, "resize error: %s\n", strerror(-ret));
+ fprintf(stderr, "resize error: %s\n", bch2_err_str(ret));
percpu_ref_put(&resize->io_ref);
bch2_fs_stop(c);
diff --git a/cmd_dump.c b/cmd_dump.c
index 4e3d721f..76b44c58 100644
--- a/cmd_dump.c
+++ b/cmd_dump.c
@@ -82,7 +82,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd,
}
if (ret)
- die("error %s walking btree nodes", strerror(-ret));
+ die("error %s walking btree nodes", bch2_err_str(ret));
b = c->btree_roots[i].b;
if (!btree_node_fake(b)) {
@@ -147,7 +147,7 @@ int cmd_dump(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(argv, argc, opts);
if (IS_ERR(c))
- die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
down_read(&c->gc_lock);
diff --git a/cmd_format.c b/cmd_format.c
index 4debc285..26a1cd9f 100644
--- a/cmd_format.c
+++ b/cmd_format.c
@@ -24,6 +24,7 @@
#include "libbcachefs.h"
#include "crypto.h"
#include "libbcachefs/darray.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/opts.h"
#include "libbcachefs/super-io.h"
#include "libbcachefs/util.h"
@@ -218,6 +219,9 @@ int cmd_format(int argc, char *argv[])
break;
}
+ if (opts.version != bcachefs_metadata_version_current)
+ initialize = false;
+
if (!devices.nr)
die("Please supply a device");
@@ -270,7 +274,7 @@ int cmd_format(int argc, char *argv[])
mount_opts);
if (IS_ERR(c))
die("error opening %s: %s", device_paths.data[0],
- strerror(-PTR_ERR(c)));
+ bch2_err_str(PTR_ERR(c)));
bch2_fs_stop(c);
}
@@ -336,7 +340,7 @@ int cmd_show_super(int argc, char *argv[])
struct bch_sb_handle sb;
int ret = bch2_read_super(dev, &opts, &sb);
if (ret)
- die("Error opening %s: %s", dev, strerror(-ret));
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
struct printbuf buf = PRINTBUF;
diff --git a/cmd_fsck.c b/cmd_fsck.c
index 247e2072..cf20fdd8 100644
--- a/cmd_fsck.c
+++ b/cmd_fsck.c
@@ -89,7 +89,7 @@ int cmd_fsck(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(argv, argc, opts);
if (IS_ERR(c)) {
- fprintf(stderr, "error opening %s: %s\n", argv[0], strerror(-PTR_ERR(c)));
+ fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c)));
exit(8);
}
diff --git a/cmd_fusemount.c b/cmd_fusemount.c
index 216094f0..4470f838 100644
--- a/cmd_fusemount.c
+++ b/cmd_fusemount.c
@@ -17,6 +17,7 @@
#include "libbcachefs/btree_iter.h"
#include "libbcachefs/buckets.h"
#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/error.h"
#include "libbcachefs/fs-common.h"
#include "libbcachefs/inode.h"
@@ -1229,7 +1230,7 @@ int cmd_fusemount(int argc, char *argv[])
c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts);
if (IS_ERR(c))
die("error opening %s: %s", ctx.devices_str,
- strerror(-PTR_ERR(c)));
+ bch2_err_str(PTR_ERR(c)));
/* Fuse */
struct fuse_session *se =
diff --git a/cmd_key.c b/cmd_key.c
index 63b0541c..e8c3eeaf 100644
--- a/cmd_key.c
+++ b/cmd_key.c
@@ -55,7 +55,7 @@ int cmd_unlock(int argc, char *argv[])
struct bch_sb_handle sb;
int ret = bch2_read_super(dev, &opts, &sb);
if (ret)
- die("Error opening %s: %s", dev, strerror(-ret));
+ die("Error opening %s: %s", dev, bch2_err_str(ret));
if (!bch2_sb_is_encrypted(sb.sb))
die("%s is not encrypted", dev);
@@ -90,7 +90,7 @@ int cmd_set_passphrase(int argc, char *argv[])
c = bch2_fs_open(argv + 1, argc - 1, opts);
if (IS_ERR(c))
- die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
+ die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb);
if (!crypt)
@@ -127,7 +127,7 @@ int cmd_remove_passphrase(int argc, char *argv[])
opt_set(opts, nostart, true);
c = bch2_fs_open(argv + 1, argc - 1, opts);
if (IS_ERR(c))
- die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c)));
+ die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c)));
struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb);
if (!crypt)
diff --git a/cmd_kill_btree_node.c b/cmd_kill_btree_node.c
index a0e0fc9b..a8915a1f 100644
--- a/cmd_kill_btree_node.c
+++ b/cmd_kill_btree_node.c
@@ -9,6 +9,7 @@
#include "libbcachefs/bcachefs.h"
#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/error.h"
#include "libbcachefs/super.h"
@@ -60,7 +61,7 @@ int cmd_kill_btree_node(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(argv, argc, opts);
if (IS_ERR(c))
- die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
struct btree_trans trans;
struct btree_iter iter;
@@ -70,7 +71,7 @@ int cmd_kill_btree_node(int argc, char *argv[])
ret = posix_memalign(&zeroes, c->opts.block_size, c->opts.block_size);
if (ret)
- die("error %s from posix_memalign", strerror(ret));
+ die("error %s from posix_memalign", bch2_err_str(ret));
bch2_trans_init(&trans, c, 0, 0);
diff --git a/cmd_list.c b/cmd_list.c
index 382153da..db66af2d 100644
--- a/cmd_list.c
+++ b/cmd_list.c
@@ -67,7 +67,7 @@ static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, unsigne
bch2_trans_iter_exit(&trans, &iter);
if (ret)
- die("error %s walking btree nodes", strerror(-ret));
+ die("error %s walking btree nodes", bch2_err_str(ret));
bch2_trans_exit(&trans);
printbuf_exit(&buf);
@@ -96,7 +96,7 @@ static void list_nodes(struct bch_fs *c, enum btree_id btree_id, unsigned level,
bch2_trans_iter_exit(&trans, &iter);
if (ret)
- die("error %s walking btree nodes", strerror(-ret));
+ die("error %s walking btree nodes", bch2_err_str(ret));
bch2_trans_exit(&trans);
printbuf_exit(&buf);
@@ -232,7 +232,7 @@ static void list_nodes_ondisk(struct bch_fs *c, enum btree_id btree_id, unsigned
bch2_trans_iter_exit(&trans, &iter);
if (ret)
- die("error %s walking btree nodes", strerror(-ret));
+ die("error %s walking btree nodes", bch2_err_str(ret));
bch2_trans_exit(&trans);
printbuf_exit(&buf);
@@ -270,7 +270,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, unsigned l
bch2_trans_iter_exit(&trans, &iter);
if (ret)
- die("error %s walking btree nodes", strerror(-ret));
+ die("error %s walking btree nodes", bch2_err_str(ret));
bch2_trans_exit(&trans);
printbuf_exit(&buf);
@@ -376,7 +376,7 @@ int cmd_list(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(argv, argc, opts);
if (IS_ERR(c))
- die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
for (btree_id = btree_id_start;
diff --git a/cmd_list_journal.c b/cmd_list_journal.c
index 0836ebfc..e89f7de9 100644
--- a/cmd_list_journal.c
+++ b/cmd_list_journal.c
@@ -9,6 +9,7 @@
#include "libbcachefs/bcachefs.h"
#include "libbcachefs/btree_iter.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/error.h"
#include "libbcachefs/journal_io.h"
#include "libbcachefs/journal_seq_blacklist.h"
@@ -75,7 +76,7 @@ int cmd_list_journal(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(argv, argc, opts);
if (IS_ERR(c))
- die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c)));
+ die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c)));
struct journal_replay *p, **_p;
struct genradix_iter iter;
diff --git a/cmd_migrate.c b/cmd_migrate.c
index 707f13e1..5a35c5a1 100644
--- a/cmd_migrate.c
+++ b/cmd_migrate.c
@@ -30,6 +30,7 @@
#include "libbcachefs/btree_update.h"
#include "libbcachefs/buckets.h"
#include "libbcachefs/dirent.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/fs-common.h"
#include "libbcachefs/inode.h"
#include "libbcachefs/io.h"
@@ -127,7 +128,7 @@ static void update_inode(struct bch_fs *c,
ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i,
NULL, NULL, 0);
if (ret)
- die("error updating inode: %s", strerror(-ret));
+ die("error updating inode: %s", bch2_err_str(ret));
}
static void create_link(struct bch_fs *c,
@@ -143,7 +144,7 @@ static void create_link(struct bch_fs *c,
(subvol_inum) { 1, parent->bi_inum }, &parent_u,
(subvol_inum) { 1, inum }, &inode, &qstr));
if (ret)
- die("error creating hardlink: %s", strerror(-ret));
+ die("error creating hardlink: %s", bch2_err_str(ret));
}
static struct bch_inode_unpacked create_file(struct bch_fs *c,
@@ -164,7 +165,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c,
uid, gid, mode, rdev, NULL, NULL,
(subvol_inum) {}, 0));
if (ret)
- die("error creating %s: %s", name, strerror(-ret));
+ die("error creating %s: %s", name, bch2_err_str(ret));
return new_inode;
}
@@ -235,7 +236,7 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst,
&hash_info, attr,
val, val_size, h->flags, 0));
if (ret < 0)
- die("error creating xattr: %s", strerror(-ret));
+ die("error creating xattr: %s", bch2_err_str(ret));
}
}
@@ -270,7 +271,7 @@ static void write_data(struct bch_fs *c,
int ret = bch2_disk_reservation_get(c, &op.res, len >> 9,
c->opts.data_replicas, 0);
if (ret)
- die("error reserving space in new filesystem: %s", strerror(-ret));
+ die("error reserving space in new filesystem: %s", bch2_err_str(ret));
closure_call(&op.cl, bch2_write, NULL, &cl);
@@ -335,12 +336,12 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst,
BCH_DISK_RESERVATION_NOFAIL);
if (ret)
die("error reserving space in new filesystem: %s",
- strerror(-ret));
+ bch2_err_str(ret));
ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i,
&res, NULL, 0);
if (ret)
- die("btree insert error %s", strerror(-ret));
+ die("btree insert error %s", bch2_err_str(ret));
bch2_disk_reservation_put(c, &res);
@@ -581,7 +582,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path,
int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO },
&root_inode);
if (ret)
- die("error looking up root directory: %s", strerror(-ret));
+ die("error looking up root directory: %s", bch2_err_str(ret));
if (fchdir(src_fd))
die("chdir error: %m");
@@ -706,13 +707,13 @@ static int migrate_fs(const char *fs_path,
c = bch2_fs_open(path, 1, opts);
if (IS_ERR(c))
- die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
+ die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
mark_unreserved_space(c, extents);
int ret = bch2_fs_start(c);
if (ret)
- die("Error starting new filesystem: %s", strerror(-ret));
+ die("Error starting new filesystem: %s", bch2_err_str(ret));
copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents);
@@ -724,7 +725,7 @@ static int migrate_fs(const char *fs_path,
c = bch2_fs_open(path, 1, opts);
if (IS_ERR(c))
- die("Error opening new filesystem: %s", strerror(-PTR_ERR(c)));
+ die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c)));
bch2_fs_stop(c);
printf("fsck complete\n");
diff --git a/cmd_option.c b/cmd_option.c
index 86768e5d..6ce34016 100644
--- a/cmd_option.c
+++ b/cmd_option.c
@@ -20,6 +20,7 @@
#include "cmds.h"
#include "libbcachefs.h"
+#include "libbcachefs/errcode.h"
#include "libbcachefs/opts.h"
#include "libbcachefs/super-io.h"
@@ -64,7 +65,7 @@ int cmd_set_option(int argc, char *argv[])
struct bch_fs *c = bch2_fs_open(argv, argc, open_opts);
if (IS_ERR(c)) {
- fprintf(stderr, "error opening %s: %s\n", argv[0], strerror(-PTR_ERR(c)));
+ fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c)));
exit(EXIT_FAILURE);
}
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 17fe235e..cf48570c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -20,12 +20,10 @@
static inline void *kmalloc(size_t size, gfp_t flags)
{
- unsigned i = 0;
+ unsigned i;
void *p;
- do {
- run_shrinkers(flags, i != 0);
-
+ for (i = 0; i < 10; i++) {
if (size) {
size_t alignment = min(rounddown_pow_of_two(size), (size_t)PAGE_SIZE);
alignment = max(sizeof(void *), alignment);
@@ -34,9 +32,15 @@ static inline void *kmalloc(size_t size, gfp_t flags)
} else {
p = malloc(0);
}
- if (p && (flags & __GFP_ZERO))
- memset(p, 0, size);
- } while (!p && i++ < 10);
+
+ if (p) {
+ if (flags & __GFP_ZERO)
+ memset(p, 0, size);
+ break;
+ }
+
+ run_shrinkers(flags, true);
+ }
return p;
}
@@ -93,16 +97,20 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t
static inline struct page *alloc_pages(gfp_t flags, unsigned int order)
{
size_t size = PAGE_SIZE << order;
- unsigned i = 0;
+ unsigned i;
void *p;
- do {
- run_shrinkers(flags, i != 0);
-
+ for (i = 0; i < 10; i++) {
p = aligned_alloc(PAGE_SIZE, size);
- if (p && (flags & __GFP_ZERO))
- memset(p, 0, size);
- } while (!p && i++ < 10);
+
+ if (p) {
+ if (flags & __GFP_ZERO)
+ memset(p, 0, size);
+ break;
+ }
+
+ run_shrinkers(flags, true);
+ }
return p;
}
@@ -193,20 +201,24 @@ static inline struct kmem_cache *kmem_cache_create(size_t obj_size)
#define vfree(p) free(p)
-static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask)
+static inline void *__vmalloc(unsigned long size, gfp_t flags)
{
- unsigned i = 0;
+ unsigned i;
void *p;
size = round_up(size, PAGE_SIZE);
- do {
- run_shrinkers(gfp_mask, i != 0);
-
+ for (i = 0; i < 10; i++) {
p = aligned_alloc(PAGE_SIZE, size);
- if (p && gfp_mask & __GFP_ZERO)
- memset(p, 0, size);
- } while (!p && i++ < 10);
+
+ if (p) {
+ if (flags & __GFP_ZERO)
+ memset(p, 0, size);
+ break;
+ }
+
+ run_shrinkers(flags, true);
+ }
return p;
}
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index 47ba750d..f699146a 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -564,6 +564,7 @@ TRACE_EVENT(bucket_alloc_fail,
__field(u64, need_journal_commit )
__field(u64, nouse )
__field(bool, nonblocking )
+ __field(u64, nocow )
__array(char, err, 32 )
),
@@ -579,10 +580,11 @@ TRACE_EVENT(bucket_alloc_fail,
__entry->need_journal_commit = s->skipped_need_journal_commit;
__entry->nouse = s->skipped_nouse;
__entry->nonblocking = nonblocking;
+ __entry->nocow = s->skipped_nocow;
strscpy(__entry->err, err, sizeof(__entry->err));
),
- TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u err %s",
+ TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u nocow %llu err %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->reserve,
__entry->free,
@@ -594,6 +596,7 @@ TRACE_EVENT(bucket_alloc_fail,
__entry->need_journal_commit,
__entry->nouse,
__entry->nonblocking,
+ __entry->nocow,
__entry->err)
);
@@ -702,6 +705,37 @@ TRACE_EVENT(move_data,
__entry->sectors_moved, __entry->keys_moved)
);
+TRACE_EVENT(evacuate_bucket,
+ TP_PROTO(struct bch_fs *c, struct bpos *bucket,
+ unsigned sectors, unsigned bucket_size,
+ int ret),
+ TP_ARGS(c, bucket, sectors, bucket_size, ret),
+
+ TP_STRUCT__entry(
+ __field(dev_t, dev )
+ __field(u64, member )
+ __field(u64, bucket )
+ __field(u32, sectors )
+ __field(u32, bucket_size )
+ __field(int, ret )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = c->dev;
+ __entry->member = bucket->inode;
+ __entry->bucket = bucket->offset;
+ __entry->sectors = sectors;
+ __entry->bucket_size = bucket_size;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->member, __entry->bucket,
+ __entry->sectors, __entry->bucket_size,
+ __entry->ret)
+);
+
TRACE_EVENT(copygc,
TP_PROTO(struct bch_fs *c,
u64 sectors_moved, u64 sectors_not_moved,
diff --git a/libbcachefs.h b/libbcachefs.h
index 17e8eef3..4bb51bd8 100644
--- a/libbcachefs.h
+++ b/libbcachefs.h
@@ -41,8 +41,12 @@ struct format_opts {
static inline struct format_opts format_opts_default()
{
+ unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK)
+ ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version")
+ : bcachefs_metadata_version_current;
+
return (struct format_opts) {
- .version = bcachefs_metadata_version_current,
+ .version = version,
.superblock_size = SUPERBLOCK_SIZE_DEFAULT,
};
}
diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c
index 39d8d317..a78232ed 100644
--- a/libbcachefs/alloc_background.c
+++ b/libbcachefs/alloc_background.c
@@ -386,14 +386,16 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
{
struct bch_alloc_v4 _a;
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
- const struct bch_backpointer *bps;
unsigned i;
prt_newline(out);
printbuf_indent_add(out, 2);
prt_printf(out, "gen %u oldest_gen %u data_type %s",
- a->gen, a->oldest_gen, bch2_data_types[a->data_type]);
+ a->gen, a->oldest_gen,
+ a->data_type < BCH_DATA_NR
+ ? bch2_data_types[a->data_type]
+ : "(invalid data type)");
prt_newline(out);
prt_printf(out, "journal_seq %llu", a->journal_seq);
prt_newline(out);
@@ -413,33 +415,41 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c
prt_newline(out);
prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]);
prt_newline(out);
- prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a));
- printbuf_indent_add(out, 2);
- bps = alloc_v4_backpointers_c(a);
- for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a); i++) {
+ if (k.k->type == KEY_TYPE_alloc_v4) {
+ struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k);
+ const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v);
+
+ prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a_raw.v));
prt_newline(out);
- bch2_backpointer_to_text(out, &bps[i]);
+
+ prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v));
+ printbuf_indent_add(out, 2);
+
+ for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) {
+ prt_newline(out);
+ bch2_backpointer_to_text(out, &bps[i]);
+ }
+
+ printbuf_indent_sub(out, 2);
}
- printbuf_indent_sub(out, 4);
+ printbuf_indent_sub(out, 2);
}
void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
{
if (k.k->type == KEY_TYPE_alloc_v4) {
- int d;
+ void *src, *dst;
*out = *bkey_s_c_to_alloc_v4(k).v;
- d = (int) BCH_ALLOC_V4_U64s -
- (int) (BCH_ALLOC_V4_BACKPOINTERS_START(out) ?: BCH_ALLOC_V4_U64s_V0);
- if (unlikely(d > 0)) {
- memset((u64 *) out + BCH_ALLOC_V4_BACKPOINTERS_START(out),
- 0,
- d * sizeof(u64));
- SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
- }
+ src = alloc_v4_backpointers(out);
+ SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
+ dst = alloc_v4_backpointers(out);
+
+ if (src < dst)
+ memset(src, 0, dst - src);
} else {
struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
@@ -465,20 +475,20 @@ static noinline struct bkey_i_alloc_v4 *
__bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
{
struct bkey_i_alloc_v4 *ret;
- unsigned bytes = k.k->type == KEY_TYPE_alloc_v4
- ? bkey_bytes(k.k)
- : sizeof(struct bkey_i_alloc_v4);
-
- /*
- * Reserve space for one more backpointer here:
- * Not sketchy at doing it this way, nope...
- */
- ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer));
- if (IS_ERR(ret))
- return ret;
-
if (k.k->type == KEY_TYPE_alloc_v4) {
- struct bch_backpointer *src, *dst;
+ struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k);
+ unsigned bytes = sizeof(struct bkey_i_alloc_v4) +
+ BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) *
+ sizeof(struct bch_backpointer);
+ void *src, *dst;
+
+ /*
+ * Reserve space for one more backpointer here:
+ * Not sketchy at doing it this way, nope...
+ */
+ ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer));
+ if (IS_ERR(ret))
+ return ret;
bkey_reassemble(&ret->k_i, k);
@@ -488,9 +498,15 @@ __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) *
sizeof(struct bch_backpointer));
- memset(src, 0, dst - src);
+ if (src < dst)
+ memset(src, 0, dst - src);
set_alloc_v4_u64s(ret);
} else {
+ ret = bch2_trans_kmalloc(trans, sizeof(struct bkey_i_alloc_v4) +
+ sizeof(struct bch_backpointer));
+ if (IS_ERR(ret))
+ return ret;
+
bkey_alloc_v4_init(&ret->k_i);
ret->k.p = k.k->p;
bch2_alloc_to_v4(k, &ret->v);
@@ -508,10 +524,8 @@ static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_
*/
struct bkey_i_alloc_v4 *ret =
bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer));
- if (!IS_ERR(ret)) {
+ if (!IS_ERR(ret))
bkey_reassemble(&ret->k_i, k);
- memset((void *) ret + bkey_bytes(k.k), 0, sizeof(struct bch_backpointer));
- }
return ret;
}
@@ -789,6 +803,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans,
goto err;
if (ca->mi.freespace_initialized &&
+ test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) &&
bch2_trans_inconsistent_on(old.k->type != old_type, trans,
"incorrect key when %s %s btree (got %s should be %s)\n"
" for %s",
@@ -900,13 +915,11 @@ int bch2_trans_mark_alloc(struct btree_trans *trans,
new_lru = alloc_lru_idx(*new_a);
if (old_lru != new_lru) {
- ret = bch2_lru_change(trans, new->k.p.inode, new->k.p.offset,
- old_lru, &new_lru, old);
+ ret = bch2_lru_change(trans, new->k.p.inode,
+ bucket_to_u64(new->k.p),
+ old_lru, new_lru);
if (ret)
return ret;
-
- if (new_a->data_type == BCH_DATA_cached)
- new_a->io_time[READ] = new_lru;
}
if (old_a->gen != new_a->gen) {
@@ -1244,7 +1257,15 @@ static int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
}
if (need_update) {
- ret = bch2_trans_update(trans, bucket_gens_iter, &g.k_i, 0);
+ struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(g));
+
+ ret = PTR_ERR_OR_ZERO(k);
+ if (ret)
+ goto err;
+
+ memcpy(k, &g, sizeof(g));
+
+ ret = bch2_trans_update(trans, bucket_gens_iter, k, 0);
if (ret)
goto err;
}
@@ -1370,7 +1391,7 @@ static int bch2_check_bucket_gens_key(struct btree_trans *trans,
k = bch2_trans_kmalloc(trans, sizeof(g));
ret = PTR_ERR_OR_ZERO(k);
if (ret)
- return ret;
+ goto out;
memcpy(k, &g, sizeof(g));
ret = bch2_trans_update(trans, iter, k, 0);
@@ -1422,7 +1443,7 @@ int bch2_check_alloc_info(struct bch_fs *c)
&freespace_iter,
&bucket_gens_iter);
if (ret)
- break;
+ goto bkey_err;
} else {
next = k.k->p;
@@ -1488,7 +1509,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
const struct bch_alloc_v4 *a;
struct bkey_s_c alloc_k, k;
struct printbuf buf = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
int ret;
alloc_k = bch2_btree_iter_peek(alloc_iter);
@@ -1505,8 +1525,9 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
return 0;
bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru,
- POS(alloc_k.k->p.inode, a->io_time[READ]), 0);
-
+ lru_pos(alloc_k.k->p.inode,
+ bucket_to_u64(alloc_k.k->p),
+ a->io_time[READ]), 0);
k = bch2_btree_iter_peek_slot(&lru_iter);
ret = bkey_err(k);
if (ret)
@@ -1517,21 +1538,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
" %s",
(printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) ||
- fsck_err_on(k.k->type != KEY_TYPE_lru ||
- le64_to_cpu(bkey_s_c_to_lru(k).v->idx) != alloc_k.k->p.offset, c,
- "incorrect/missing lru entry\n"
- " %s\n"
+ fsck_err_on(k.k->type != KEY_TYPE_set, c,
+ "missing lru entry\n"
" %s",
(printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
- (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
+ bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
u64 read_time = a->io_time[READ] ?:
atomic64_read(&c->io_clock[READ].now);
ret = bch2_lru_set(trans,
alloc_k.k->p.inode,
- alloc_k.k->p.offset,
- &read_time);
+ bucket_to_u64(alloc_k.k->p),
+ read_time);
if (ret)
goto err;
@@ -1552,7 +1570,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
err:
fsck_err:
bch2_trans_iter_exit(trans, &lru_iter);
- printbuf_exit(&buf2);
printbuf_exit(&buf);
return ret;
}
@@ -1630,21 +1647,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
goto write;
}
- if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans,
- "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
- "%s",
- a->v.journal_seq,
- c->journal.flushed_seq_ondisk,
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- ret = -EIO;
+ if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+ if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ bch2_trans_inconsistent(trans,
+ "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
+ "%s",
+ a->v.journal_seq,
+ c->journal.flushed_seq_ondisk,
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ }
goto out;
}
- if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans,
- "bucket incorrectly set in need_discard btree\n"
- "%s",
- (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
- ret = -EIO;
+ if (a->v.data_type != BCH_DATA_need_discard) {
+ if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ bch2_trans_inconsistent(trans,
+ "bucket incorrectly set in need_discard btree\n"
+ "%s",
+ (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+ ret = -EIO;
+ }
+
goto out;
}
@@ -1732,51 +1756,34 @@ void bch2_do_discards(struct bch_fs *c)
}
static int invalidate_one_bucket(struct btree_trans *trans,
- struct btree_iter *lru_iter, struct bkey_s_c k,
- unsigned dev_idx, s64 *nr_to_invalidate)
+ struct btree_iter *lru_iter,
+ struct bpos bucket,
+ s64 *nr_to_invalidate)
{
struct bch_fs *c = trans->c;
struct btree_iter alloc_iter = { NULL };
struct bkey_i_alloc_v4 *a;
- struct bpos bucket;
struct printbuf buf = PRINTBUF;
unsigned cached_sectors;
int ret = 0;
- if (*nr_to_invalidate <= 0 || k.k->p.inode != dev_idx)
+ if (*nr_to_invalidate <= 0)
return 1;
- if (k.k->type != KEY_TYPE_lru) {
- prt_printf(&buf, "non lru key in lru btree:\n ");
- bch2_bkey_val_to_text(&buf, c, k);
-
- if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
- bch_err(c, "%s", buf.buf);
- } else {
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- ret = -EINVAL;
- }
-
- goto out;
- }
-
- bucket = POS(dev_idx, le64_to_cpu(bkey_s_c_to_lru(k).v->idx));
-
a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket);
ret = PTR_ERR_OR_ZERO(a);
if (ret)
goto out;
- if (k.k->p.offset != alloc_lru_idx(a->v)) {
+ if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) {
prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n ");
- bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
+ bch2_bpos_to_text(&buf, lru_iter->pos);
prt_printf(&buf, "\n ");
- bch2_bkey_val_to_text(&buf, c, k);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i));
- if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
- bch_err(c, "%s", buf.buf);
- } else {
- bch2_trans_inconsistent(trans, "%s", buf.buf);
+ bch_err(c, "%s", buf.buf);
+ if (test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) {
+ bch2_inconsistent_error(c);
ret = -EINVAL;
}
@@ -1827,9 +1834,13 @@ static void bch2_do_invalidates_work(struct work_struct *work)
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
- ret = for_each_btree_key2(&trans, iter, BTREE_ID_lru,
- POS(ca->dev_idx, 0), BTREE_ITER_INTENT, k,
- invalidate_one_bucket(&trans, &iter, k, ca->dev_idx, &nr_to_invalidate));
+ ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_lru,
+ lru_pos(ca->dev_idx, 0, 0),
+ lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
+ BTREE_ITER_INTENT, k,
+ invalidate_one_bucket(&trans, &iter,
+ u64_to_bucket(k.k->p.offset),
+ &nr_to_invalidate));
if (ret < 0) {
percpu_ref_put(&ca->ref);
diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h
index be48b7d8..a0c3c47b 100644
--- a/libbcachefs/alloc_background.h
+++ b/libbcachefs/alloc_background.h
@@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos)
pos.offset < ca->mi.nbuckets;
}
+static inline u64 bucket_to_u64(struct bpos bucket)
+{
+ return (bucket.inode << 48) | bucket.offset;
+}
+
+static inline struct bpos u64_to_bucket(u64 bucket)
+{
+ return POS(bucket >> 48, bucket & ~(~0ULL << 48));
+}
+
static inline u8 alloc_gc_gen(struct bch_alloc_v4 a)
{
return a.gen - a.oldest_gen;
@@ -112,8 +122,6 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s
int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int);
-#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
-
int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
@@ -192,7 +200,9 @@ void bch2_do_invalidates(struct bch_fs *);
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
{
- return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a));
+ return (void *) ((u64 *) &a->v +
+ (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?:
+ BCH_ALLOC_V4_U64s_V0));
}
static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a)
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 46f215c8..f1cfb90b 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -28,6 +28,7 @@
#include "io.h"
#include "journal.h"
#include "movinggc.h"
+#include "nocow_locking.h"
#include <linux/math64.h>
#include <linux/rculist.h>
@@ -312,28 +313,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
a = bch2_alloc_to_v4(k, &a_convert);
- if (genbits != (alloc_freespace_genbits(*a) >> 56)) {
- prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
- " freespace key ",
- genbits, alloc_freespace_genbits(*a) >> 56);
+ if (a->data_type != BCH_DATA_free) {
+ if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ ob = NULL;
+ goto err;
+ }
+
+ prt_printf(&buf, "non free bucket in freespace btree\n"
+ " freespace key ");
bch2_bkey_val_to_text(&buf, c, freespace_k);
prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k);
bch2_trans_inconsistent(trans, "%s", buf.buf);
ob = ERR_PTR(-EIO);
goto err;
-
}
- if (a->data_type != BCH_DATA_free) {
- prt_printf(&buf, "non free bucket in freespace btree\n"
- " freespace key ");
+ if (genbits != (alloc_freespace_genbits(*a) >> 56) &&
+ test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n"
+ " freespace key ",
+ genbits, alloc_freespace_genbits(*a) >> 56);
bch2_bkey_val_to_text(&buf, c, freespace_k);
prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k);
bch2_trans_inconsistent(trans, "%s", buf.buf);
ob = ERR_PTR(-EIO);
goto err;
+
}
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
@@ -506,8 +513,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL;
- bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
- u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor;
+ bool freespace = READ_ONCE(ca->mi.freespace_initialized);
+ u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor;
u64 avail;
struct bucket_alloc_state s = { .cur_bucket = start };
bool waiting = false;
@@ -546,20 +553,25 @@ again:
if (ob)
return ob;
}
-
- ob = likely(ca->mi.freespace_initialized)
+alloc:
+ ob = likely(freespace)
? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl)
: bch2_bucket_alloc_early(trans, ca, reserve, &s, cl);
if (s.skipped_need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
- if (!ob && !freespace_initialized && start) {
+ if (!ob && !freespace && start) {
start = s.cur_bucket = 0;
- goto again;
+ goto alloc;
}
- if (!freespace_initialized)
+ if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) {
+ freespace = false;
+ goto alloc;
+ }
+
+ if (!freespace)
ca->bucket_alloc_trans_early_cursor = s.cur_bucket;
err:
if (!ob)
@@ -1224,12 +1236,9 @@ err:
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) ||
bch2_err_matches(ret, BCH_ERR_freelist_empty))
return cl
- ? -EAGAIN
+ ? -BCH_ERR_bucket_alloc_blocked
: -BCH_ERR_ENOSPC_bucket_alloc;
- if (bch2_err_matches(ret, BCH_ERR_insufficient_devices))
- return -EROFS;
-
return ret;
}
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index 7660a254..405823d1 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -242,6 +242,9 @@ btree:
memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) {
struct printbuf buf = PRINTBUF;
+ if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
+ goto err;
+
prt_printf(&buf, "backpointer not found when deleting");
prt_newline(&buf);
printbuf_indent_add(&buf, 2);
@@ -261,12 +264,9 @@ btree:
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
- bch_err(c, "%s", buf.buf);
- } else {
- ret = -EIO;
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- }
+ bch_err(c, "%s", buf.buf);
+ bch2_inconsistent_error(c);
+ ret = -EIO;
printbuf_exit(&buf);
goto err;
}
@@ -283,7 +283,6 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans,
struct bkey_s_c orig_k)
{
struct bch_fs *c = trans->c;
- struct bch_dev *ca;
struct bch_backpointer *bps = alloc_v4_backpointers(&a->v);
unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v);
struct bkey_i_backpointer *bp_k;
@@ -317,11 +316,10 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans,
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
- bch_err(c, "%s", buf.buf);
- else {
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- printbuf_exit(&buf);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+ bch2_inconsistent_error(c);
return -EIO;
}
}
@@ -334,18 +332,9 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans,
}
/* Overflow: use backpointer btree */
- bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k));
- ret = PTR_ERR_OR_ZERO(bp_k);
- if (ret)
- return ret;
-
- ca = bch_dev_bkey_exists(c, a->k.p.inode);
- bkey_backpointer_init(&bp_k->k_i);
- bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset);
- bp_k->v = bp;
-
- bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p,
+ bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers,
+ bucket_pos_to_bp(c, a->k.p, bp.bucket_offset),
BTREE_ITER_INTENT|
BTREE_ITER_SLOTS|
BTREE_ITER_WITH_UPDATES);
@@ -369,16 +358,22 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans,
prt_printf(&buf, "for ");
bch2_bkey_val_to_text(&buf, c, orig_k);
- if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
- bch_err(c, "%s", buf.buf);
- else {
- bch2_trans_inconsistent(trans, "%s", buf.buf);
- printbuf_exit(&buf);
+ bch_err(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) {
+ bch2_inconsistent_error(c);
ret = -EIO;
goto err;
}
}
+ bp_k = bch2_bkey_alloc(trans, &bp_iter, backpointer);
+ ret = PTR_ERR_OR_ZERO(bp_k);
+ if (ret)
+ goto err;
+
+ bp_k->v = bp;
+
ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0);
err:
bch2_trans_iter_exit(trans, &bp_iter);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 7f479cdc..febef9ac 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -206,7 +206,7 @@
#include "bcachefs_format.h"
#include "errcode.h"
#include "fifo.h"
-#include "nocow_locking.h"
+#include "nocow_locking_types.h"
#include "opts.h"
#include "util.h"
@@ -549,6 +549,7 @@ enum {
/* fsck passes: */
BCH_FS_TOPOLOGY_REPAIR_DONE,
BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */
+ BCH_FS_CHECK_ALLOC_DONE,
BCH_FS_CHECK_LRUS_DONE,
BCH_FS_CHECK_BACKPOINTERS_DONE,
BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE,
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 91a6624e..48438e67 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -1557,7 +1557,8 @@ struct bch_sb_field_journal_seq_blacklist {
x(backpointers, 22) \
x(inode_v3, 23) \
x(unwritten_extents, 24) \
- x(bucket_gens, 25)
+ x(bucket_gens, 25) \
+ x(lru_v2, 26)
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c
index bb74e6f7..e13ce07f 100644
--- a/libbcachefs/bkey_methods.c
+++ b/libbcachefs/bkey_methods.c
@@ -186,7 +186,7 @@ static unsigned bch2_key_types_allowed[] = {
(1U << KEY_TYPE_snapshot),
[BKEY_TYPE_lru] =
(1U << KEY_TYPE_deleted)|
- (1U << KEY_TYPE_lru),
+ (1U << KEY_TYPE_set),
[BKEY_TYPE_freespace] =
(1U << KEY_TYPE_deleted)|
(1U << KEY_TYPE_set),
diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c
index d24827fb..b5e78042 100644
--- a/libbcachefs/btree_cache.c
+++ b/libbcachefs/btree_cache.c
@@ -577,7 +577,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl)
}
trace_and_count(c, btree_cache_cannibalize_lock_fail, c);
- return -EAGAIN;
+ return -BCH_ERR_btree_cache_cannibalize_lock_blocked;
success:
trace_and_count(c, btree_cache_cannibalize_lock, c);
@@ -952,8 +952,6 @@ retry:
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
- * If IO is necessary and running under generic_make_request, returns -EAGAIN.
- *
* The btree node will have either a read or a write lock held, depending on
* the @write parameter.
*/
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 055987a2..6b7353c9 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -1285,8 +1285,7 @@ fsck_err:
return ret;
}
-static int bch2_gc_start(struct bch_fs *c,
- bool metadata_only)
+static int bch2_gc_start(struct bch_fs *c)
{
struct bch_dev *ca = NULL;
unsigned i;
@@ -1301,7 +1300,6 @@ static int bch2_gc_start(struct bch_fs *c,
}
for_each_member_device(ca, c, i) {
- BUG_ON(ca->buckets_gc);
BUG_ON(ca->usage_gc);
ca->usage_gc = alloc_percpu(struct bch_dev_usage);
@@ -1318,6 +1316,22 @@ static int bch2_gc_start(struct bch_fs *c,
return 0;
}
+static int bch2_gc_reset(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ unsigned i;
+
+ for_each_member_device(ca, c, i) {
+ free_percpu(ca->usage_gc);
+ ca->usage_gc = NULL;
+ }
+
+ free_percpu(c->usage_gc);
+ c->usage_gc = NULL;
+
+ return bch2_gc_start(c);
+}
+
/* returns true if not equal */
static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l,
struct bch_alloc_v4 r)
@@ -1763,7 +1777,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
bch2_btree_interior_updates_flush(c);
- ret = bch2_gc_start(c, metadata_only) ?:
+ ret = bch2_gc_start(c) ?:
bch2_gc_alloc_start(c, metadata_only) ?:
bch2_gc_reflink_start(c, metadata_only);
if (ret)
@@ -1824,6 +1838,9 @@ again:
bch2_gc_stripes_reset(c, metadata_only);
bch2_gc_alloc_reset(c, metadata_only);
bch2_gc_reflink_reset(c, metadata_only);
+ ret = bch2_gc_reset(c);
+ if (ret)
+ goto out;
/* flush fsck errors, reset counters */
bch2_flush_fsck_errs(c);
@@ -1975,7 +1992,7 @@ int bch2_gc_gens(struct bch_fs *c)
NULL, NULL,
BTREE_INSERT_NOFAIL,
gc_btree_gens_key(&trans, &iter, k));
- if (ret && ret != -EROFS)
+ if (ret && !bch2_err_matches(ret, EROFS))
bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret));
if (ret)
goto err;
@@ -1988,7 +2005,7 @@ int bch2_gc_gens(struct bch_fs *c)
NULL, NULL,
BTREE_INSERT_NOFAIL,
bch2_alloc_write_oldest_gen(&trans, &iter, k));
- if (ret && ret != -EROFS)
+ if (ret && !bch2_err_matches(ret, EROFS))
bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret));
if (ret)
goto err;
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index f9ccc216..9c139a7b 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -2077,6 +2077,11 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
iter->update_path, pos,
iter->flags & BTREE_ITER_INTENT,
_THIS_IP_);
+ ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
}
/*
@@ -2776,6 +2781,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
return p;
}
+static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_path *path;
+
+ trans_for_each_path(trans, path)
+ if (path->cached && !btree_node_locked(path, 0))
+ path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset);
+
+ srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
+ trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
+}
+
/**
* bch2_trans_begin() - reset a transaction after a interrupted attempt
* @trans: transaction to reset
@@ -2831,6 +2850,9 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_relock(trans);
}
+ if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ)))
+ bch2_trans_reset_srcu_lock(trans);
+
trans->last_restarted_ip = _RET_IP_;
if (trans->restarted)
bch2_btree_path_traverse_all(trans);
@@ -2920,6 +2942,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
trans->nr_max_paths = s->nr_max_paths;
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
+ trans->srcu_lock_time = jiffies;
mutex_lock(&c->btree_trans_lock);
list_for_each_entry(pos, &c->btree_trans_list, list) {
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index 2f13be60..07c415d5 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -459,7 +459,7 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans,
#define bch2_bkey_alloc(_trans, _iter, _type) \
({ \
- struct bkey_i_##_type *_k = bch2_trans_kmalloc(_trans, sizeof(*_k));\
+ struct bkey_i_##_type *_k = bch2_trans_kmalloc_nomemzero(_trans, sizeof(*_k));\
if (!IS_ERR(_k)) { \
bkey_##_type##_init(&_k->k_i); \
_k->k.p = (_iter)->pos; \
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 07c509aa..af86ba12 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -233,7 +233,7 @@ struct btree_path {
/* btree_iter_copy starts here: */
struct bpos pos;
- enum btree_id btree_id:4;
+ enum btree_id btree_id:5;
bool cached:1;
bool preserve:1;
enum btree_path_uptodate uptodate:2;
@@ -243,7 +243,7 @@ struct btree_path {
*/
bool should_be_locked:1;
unsigned level:3,
- locks_want:4;
+ locks_want:3;
u8 nodes_locked;
struct btree_path_level {
@@ -277,7 +277,7 @@ struct btree_iter {
struct btree_path *update_path;
struct btree_path *key_cache_path;
- enum btree_id btree_id:4;
+ enum btree_id btree_id:8;
unsigned min_depth:3;
unsigned advanced:1;
@@ -421,6 +421,7 @@ struct btree_trans {
enum bch_errcode restarted:16;
u32 restart_count;
unsigned long last_restarted_ip;
+ unsigned long srcu_lock_time;
/*
* For when bch2_trans_update notices we'll be splitting a compressed
@@ -442,7 +443,7 @@ struct btree_trans {
/* update path: */
struct btree_trans_commit_hook *hooks;
- DARRAY(u64) extra_journal_entries;
+ darray_u64 extra_journal_entries;
struct journal_entry_pin *journal_pin;
struct journal_res journal_res;
@@ -702,15 +703,6 @@ struct btree_root {
s8 error;
};
-enum btree_insert_ret {
- BTREE_INSERT_OK,
- /* leaf node needs to be split */
- BTREE_INSERT_BTREE_NODE_FULL,
- BTREE_INSERT_NEED_MARK_REPLICAS,
- BTREE_INSERT_NEED_JOURNAL_RES,
- BTREE_INSERT_NEED_JOURNAL_RECLAIM,
-};
-
enum btree_gc_coalesce_fail_reason {
BTREE_GC_COALESCE_FAIL_RESERVE_GET,
BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC,
diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h
index 1c2e7b2b..7e9f1f17 100644
--- a/libbcachefs/btree_update.h
+++ b/libbcachefs/btree_update.h
@@ -82,7 +82,8 @@ void bch2_trans_commit_hook(struct btree_trans *,
struct btree_trans_commit_hook *);
int __bch2_trans_commit(struct btree_trans *);
-int bch2_trans_log_msg(struct btree_trans *, const char *);
+int bch2_trans_log_msg(struct btree_trans *, const char *, ...);
+int bch2_fs_log_msg(struct bch_fs *, const char *, ...);
/**
* bch2_trans_commit - insert keys at given iterator positions
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index a4476f16..a49e7b6b 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -1162,7 +1162,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
bch2_trans_unlock(trans);
closure_sync(&cl);
- } while (ret == -EAGAIN);
+ } while (bch2_err_matches(ret, BCH_ERR_operation_blocked));
}
if (ret) {
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 154a819b..a2b37dd4 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -316,15 +316,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s,
static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
- struct bch_fs *c = trans->c;
- int ret;
-
- ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
- trans->journal_u64s,
- flags|
- (trans->flags & JOURNAL_WATERMARK_MASK));
-
- return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
+ return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
+ trans->journal_u64s,
+ flags|
+ (trans->flags & JOURNAL_WATERMARK_MASK));
}
#define JSET_ENTRY_LOG_U64s 4
@@ -343,23 +338,20 @@ static void journal_transaction_name(struct btree_trans *trans)
strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64));
}
-static inline enum btree_insert_ret
-btree_key_can_insert(struct btree_trans *trans,
- struct btree *b,
- unsigned u64s)
+static inline int btree_key_can_insert(struct btree_trans *trans,
+ struct btree *b, unsigned u64s)
{
struct bch_fs *c = trans->c;
if (!bch2_btree_node_insert_fits(c, b, u64s))
- return BTREE_INSERT_BTREE_NODE_FULL;
+ return -BCH_ERR_btree_insert_btree_node_full;
- return BTREE_INSERT_OK;
+ return 0;
}
-static enum btree_insert_ret
-btree_key_can_insert_cached(struct btree_trans *trans,
- struct btree_path *path,
- unsigned u64s)
+static int btree_key_can_insert_cached(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned u64s)
{
struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b;
@@ -372,7 +364,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
bch2_btree_key_cache_must_wait(c) &&
!(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM))
- return BTREE_INSERT_NEED_JOURNAL_RECLAIM;
+ return -BCH_ERR_btree_insert_need_journal_reclaim;
/*
* bch2_varint_decode can read past the end of the buffer by at most 7
@@ -381,7 +373,7 @@ btree_key_can_insert_cached(struct btree_trans *trans,
u64s += 1;
if (u64s <= ck->u64s)
- return BTREE_INSERT_OK;
+ return 0;
new_u64s = roundup_pow_of_two(u64s);
new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS);
@@ -646,21 +638,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
trans->journal_res.seq = c->journal.replay_journal_seq;
}
- if (unlikely(trans->extra_journal_entries.nr)) {
- memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
- trans->extra_journal_entries.data,
- trans->extra_journal_entries.nr);
-
- trans->journal_res.offset += trans->extra_journal_entries.nr;
- trans->journal_res.u64s -= trans->extra_journal_entries.nr;
- }
-
/*
* Not allowed to fail after we've gotten our journal reservation - we
* have to use it:
*/
- if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
+ if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
+ !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
if (bch2_journal_seq_verify)
trans_for_each_update(trans, i)
i->k->k.version.lo = trans->journal_res.seq;
@@ -671,7 +655,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
if (trans->fs_usage_deltas &&
bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas))
- return BTREE_INSERT_NEED_MARK_REPLICAS;
+ return -BCH_ERR_btree_insert_need_mark_replicas;
trans_for_each_update(trans, i)
if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
@@ -686,6 +670,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
return ret;
}
+ if (unlikely(trans->extra_journal_entries.nr)) {
+ memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res),
+ trans->extra_journal_entries.data,
+ trans->extra_journal_entries.nr);
+
+ trans->journal_res.offset += trans->extra_journal_entries.nr;
+ trans->journal_res.u64s -= trans->extra_journal_entries.nr;
+ }
+
if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
trans_for_each_update(trans, i) {
struct journal *j = &c->journal;
@@ -844,7 +837,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans,
&trans->journal_preres, trans->journal_preres_u64s,
JOURNAL_RES_GET_NONBLOCK|
(trans->flags & JOURNAL_WATERMARK_MASK));
- if (unlikely(ret == -EAGAIN))
+ if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked))
ret = bch2_trans_journal_preres_get_cold(trans,
trans->journal_preres_u64s, trace_ip);
if (unlikely(ret))
@@ -900,12 +893,12 @@ int bch2_trans_commit_error(struct btree_trans *trans,
struct bch_fs *c = trans->c;
switch (ret) {
- case BTREE_INSERT_BTREE_NODE_FULL:
+ case -BCH_ERR_btree_insert_btree_node_full:
ret = bch2_btree_split_leaf(trans, i->path, trans->flags);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path);
break;
- case BTREE_INSERT_NEED_MARK_REPLICAS:
+ case -BCH_ERR_btree_insert_need_mark_replicas:
bch2_trans_unlock(trans);
ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas);
@@ -916,7 +909,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (ret)
trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip);
break;
- case BTREE_INSERT_NEED_JOURNAL_RES:
+ case -BCH_ERR_journal_res_get_blocked:
bch2_trans_unlock(trans);
if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) &&
@@ -933,7 +926,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
if (ret)
trace_and_count(c, trans_restart_journal_res_get, trans, trace_ip);
break;
- case BTREE_INSERT_NEED_JOURNAL_RECLAIM:
+ case -BCH_ERR_btree_insert_need_journal_reclaim:
bch2_trans_unlock(trans);
trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip);
@@ -970,7 +963,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans)
if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)) ||
test_bit(BCH_FS_STARTED, &c->flags))
- return -EROFS;
+ return -BCH_ERR_erofs_trans_commit;
bch2_trans_unlock(trans);
@@ -1734,18 +1727,25 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
return ret;
}
-int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
+static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args)
{
- unsigned len = strlen(msg);
- unsigned u64s = DIV_ROUND_UP(len, sizeof(u64));
+ struct printbuf buf = PRINTBUF;
struct jset_entry_log *l;
+ unsigned u64s;
int ret;
- ret = darray_make_room(&trans->extra_journal_entries, jset_u64s(u64s));
+ prt_vprintf(&buf, fmt, args);
+ ret = buf.allocation_failure ? -ENOMEM : 0;
if (ret)
- return ret;
+ goto err;
- l = (void *) &darray_top(trans->extra_journal_entries);
+ u64s = DIV_ROUND_UP(buf.pos, sizeof(u64));
+
+ ret = darray_make_room(entries, jset_u64s(u64s));
+ if (ret)
+ goto err;
+
+ l = (void *) &darray_top(*entries);
l->entry.u64s = cpu_to_le16(u64s);
l->entry.btree_id = 0;
l->entry.level = 1;
@@ -1753,10 +1753,44 @@ int bch2_trans_log_msg(struct btree_trans *trans, const char *msg)
l->entry.pad[0] = 0;
l->entry.pad[1] = 0;
l->entry.pad[2] = 0;
- memcpy(l->d, msg, len);
- while (len & 7)
- l->d[len++] = '\0';
+ memcpy(l->d, buf.buf, buf.pos);
+ while (buf.pos & 7)
+ l->d[buf.pos++] = '\0';
+
+ entries->nr += jset_u64s(u64s);
+err:
+ printbuf_exit(&buf);
+ return ret;
+}
+
+int bch2_trans_log_msg(struct btree_trans *trans, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+
+ if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) {
+ ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args);
+ } else {
+ ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW,
+ __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args));
+ }
+
+ va_end(args);
+
+ return ret;
- trans->extra_journal_entries.nr += jset_u64s(u64s);
- return 0;
}
diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c
index 3edd7b77..7ef7bb61 100644
--- a/libbcachefs/data_update.c
+++ b/libbcachefs/data_update.c
@@ -11,6 +11,7 @@
#include "io.h"
#include "keylist.h"
#include "move.h"
+#include "nocow_locking.h"
#include "subvolume.h"
#include <trace/events/bcachefs.h>
@@ -349,7 +350,7 @@ void bch2_update_unwritten_extent(struct btree_trans *trans,
update->op.nr_replicas,
update->op.alloc_reserve,
0, &cl, &wp);
- if (ret == -EAGAIN) {
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
bch2_trans_unlock(trans);
closure_sync(&cl);
continue;
@@ -459,7 +460,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
? 0
: BCH_DISK_RESERVATION_NOFAIL);
if (ret)
- return ret;
+ goto err;
}
m->op.nr_replicas = m->op.nr_replicas_required =
@@ -471,6 +472,14 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m,
if (bkey_extent_is_unwritten(k))
return -BCH_ERR_unwritten_extent_update;
return 0;
+err:
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
+ bch2_bucket_nocow_unlock(&c->nocow_locks,
+ PTR_BUCKET_POS(c, &p.ptr), 0);
+
+ bch2_bkey_buf_exit(&m->k, c);
+ bch2_bio_free_pages_pool(c, &m->op.wbio.bio);
+ return ret;
}
void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts)
diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c
index 53f0d820..c234c8d5 100644
--- a/libbcachefs/ec.c
+++ b/libbcachefs/ec.c
@@ -942,7 +942,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
closure_sync(&s->iodone);
if (s->err) {
- if (s->err != -EROFS)
+ if (!bch2_err_matches(s->err, EROFS))
bch_err(c, "error creating stripe: error writing data buckets");
goto err;
}
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 4942c367..62170964 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -20,7 +20,6 @@
x(0, open_buckets_empty) \
x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \
- x(0, insufficient_devices) \
x(0, transaction_restart) \
x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \
x(BCH_ERR_transaction_restart, transaction_restart_relock) \
@@ -53,6 +52,12 @@
x(BCH_ERR_no_btree_node, no_btree_node_down) \
x(BCH_ERR_no_btree_node, no_btree_node_init) \
x(BCH_ERR_no_btree_node, no_btree_node_cached) \
+ x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \
+ x(0, btree_insert_fail) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \
+ x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \
x(0, backpointer_to_overwritten_btree_node) \
x(0, lock_fail_root_changed) \
x(0, journal_reclaim_would_deadlock) \
@@ -76,6 +81,16 @@
x(EINVAL, device_already_online) \
x(EINVAL, insufficient_devices_to_start) \
x(EINVAL, invalid) \
+ x(EROFS, erofs_trans_commit) \
+ x(EROFS, erofs_no_writes) \
+ x(EROFS, erofs_journal_err) \
+ x(EROFS, erofs_sb_err) \
+ x(EROFS, insufficient_devices) \
+ x(0, operation_blocked) \
+ x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \
+ x(BCH_ERR_operation_blocked, journal_res_get_blocked) \
+ x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \
+ x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \
x(BCH_ERR_invalid, invalid_sb) \
x(BCH_ERR_invalid_sb, invalid_sb_magic) \
x(BCH_ERR_invalid_sb, invalid_sb_version) \
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index 938c7b43..585d16ac 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -269,6 +269,8 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k,
static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
+ memset(unpacked, 0, sizeof(*unpacked));
+
switch (k.k->type) {
case KEY_TYPE_inode: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index f0fca861..d215973a 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -27,6 +27,7 @@
#include "journal.h"
#include "keylist.h"
#include "move.h"
+#include "nocow_locking.h"
#include "rebalance.h"
#include "subvolume.h"
#include "super.h"
@@ -427,7 +428,7 @@ retry:
opts.data_replicas,
opts.data_replicas,
RESERVE_none, 0, &cl, &wp);
- if (ret == -EAGAIN) {
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) {
bch2_trans_unlock(trans);
closure_sync(&cl);
goto retry;
@@ -753,15 +754,17 @@ static void __bch2_write_index(struct bch_write_op *op)
op->written += sectors_start - keylist_sectors(keys);
- if (ret) {
+ if (ret && !bch2_err_matches(ret, EROFS)) {
struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
bch_err_inum_offset_ratelimited(c,
k->k.p.inode, k->k.p.offset << 9,
"write error while doing btree update: %s",
bch2_err_str(ret));
- goto err;
}
+
+ if (ret)
+ goto err;
}
out:
/* If some a bucket wasn't written, we can't erasure code it: */
@@ -1362,13 +1365,16 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size);
}));
- if (ret) {
+ if (ret && !bch2_err_matches(ret, EROFS)) {
struct bkey_i *k = bch2_keylist_front(&op->insert_keys);
bch_err_inum_offset_ratelimited(c,
k->k.p.inode, k->k.p.offset << 9,
"write error while doing btree update: %s",
bch2_err_str(ret));
+ }
+
+ if (ret) {
op->error = ret;
break;
}
@@ -1406,7 +1412,7 @@ static void bch2_nocow_write(struct bch_write_op *op)
struct {
struct bpos b;
unsigned gen;
- two_state_lock_t *l;
+ struct nocow_lock_bucket *l;
} buckets[BCH_REPLICAS_MAX];
unsigned nr_buckets = 0;
u32 snapshot;
@@ -1453,7 +1459,8 @@ retry:
buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr);
buckets[nr_buckets].gen = ptr->gen;
buckets[nr_buckets].l =
- bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b);
+ bucket_nocow_lock(&c->nocow_locks,
+ bucket_to_u64(buckets[nr_buckets].b));
prefetch(buckets[nr_buckets].l);
nr_buckets++;
@@ -1475,11 +1482,12 @@ retry:
for (i = 0; i < nr_buckets; i++) {
struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode);
- two_state_lock_t *l = buckets[i].l;
+ struct nocow_lock_bucket *l = buckets[i].l;
bool stale;
- if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE))
- __bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE);
+ __bch2_bucket_nocow_lock(&c->nocow_locks, l,
+ bucket_to_u64(buckets[i].b),
+ BUCKET_NOCOW_LOCK_UPDATE);
rcu_read_lock();
stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen);
@@ -1627,7 +1635,7 @@ again:
BCH_WRITE_ONLY_SPECIFIED_DEVS))
? NULL : &op->cl, &wp));
if (unlikely(ret)) {
- if (ret == -EAGAIN)
+ if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
break;
goto err;
@@ -1775,7 +1783,7 @@ void bch2_write(struct closure *cl)
if (c->opts.nochanges ||
!percpu_ref_tryget_live(&c->writes)) {
- op->error = -EROFS;
+ op->error = -BCH_ERR_erofs_no_writes;
goto err;
}
@@ -2905,11 +2913,6 @@ void bch2_fs_io_exit(struct bch_fs *c)
int bch2_fs_io_init(struct bch_fs *c)
{
- unsigned i;
-
- for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
- two_state_lock_init(&c->nocow_locks.l[i]);
-
if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio),
BIOSET_NEED_BVECS) ||
bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio),
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 95c29229..1cbca187 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -199,12 +199,6 @@ static bool journal_entry_close(struct journal *j)
/*
* should _only_ called from journal_res_get() - when we actually want a
* journal reservation - journal entry is open means journal is dirty:
- *
- * returns:
- * 0: success
- * -ENOSPC: journal currently full, must invoke reclaim
- * -EAGAIN: journal blocked, must wait
- * -EROFS: insufficient rw devices or journal error
*/
static int journal_entry_open(struct journal *j)
{
@@ -250,7 +244,7 @@ static int journal_entry_open(struct journal *j)
journal_entry_overhead(j);
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
- if (u64s <= 0)
+ if (u64s <= (ssize_t) j->early_journal_entries.nr)
return JOURNAL_ERR_journal_full;
if (fifo_empty(&j->pin) && j->reclaim_thread)
@@ -275,6 +269,12 @@ static int journal_entry_open(struct journal *j)
buf->data->seq = cpu_to_le64(journal_cur_seq(j));
buf->data->u64s = 0;
+ if (j->early_journal_entries.nr) {
+ memcpy(buf->data->_data, j->early_journal_entries.data,
+ j->early_journal_entries.nr * sizeof(u64));
+ le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr);
+ }
+
/*
* Must be set before marking the journal entry as open:
*/
@@ -291,7 +291,9 @@ static int journal_entry_open(struct journal *j)
BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK));
journal_state_inc(&new);
- new.cur_entry_offset = 0;
+
+ /* Handle any already added entries */
+ new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
@@ -304,6 +306,9 @@ static int journal_entry_open(struct journal *j)
&j->write_work,
msecs_to_jiffies(c->opts.journal_flush_delay));
journal_wake(j);
+
+ if (j->early_journal_entries.nr)
+ darray_exit(&j->early_journal_entries);
return 0;
}
@@ -353,7 +358,7 @@ retry:
return 0;
if (bch2_journal_error(j))
- return -EROFS;
+ return -BCH_ERR_erofs_journal_err;
spin_lock(&j->lock);
@@ -445,7 +450,9 @@ unlock:
}
}
- return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN;
+ return ret == JOURNAL_ERR_insufficient_devices
+ ? -EROFS
+ : -BCH_ERR_journal_res_get_blocked;
}
/*
@@ -464,7 +471,8 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
int ret;
closure_wait_event(&j->async_wait,
- (ret = __journal_res_get(j, res, flags)) != -EAGAIN ||
+ (ret = __journal_res_get(j, res, flags)) !=
+ -BCH_ERR_journal_res_get_blocked||
(flags & JOURNAL_RES_GET_NONBLOCK));
return ret;
}
@@ -720,39 +728,6 @@ int bch2_journal_meta(struct journal *j)
return bch2_journal_flush_seq(j, res.seq);
}
-int bch2_journal_log_msg(struct journal *j, const char *fmt, ...)
-{
- struct jset_entry_log *entry;
- struct journal_res res = { 0 };
- unsigned msglen, u64s;
- va_list args;
- int ret;
-
- va_start(args, fmt);
- msglen = vsnprintf(NULL, 0, fmt, args) + 1;
- va_end(args);
-
- u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64)));
-
- ret = bch2_journal_res_get(j, &res, u64s, 0);
- if (ret)
- return ret;
-
- entry = container_of(journal_res_entry(j, &res),
- struct jset_entry_log, entry);
- memset(entry, 0, u64s * sizeof(u64));
- entry->entry.type = BCH_JSET_ENTRY_log;
- entry->entry.u64s = u64s - 1;
-
- va_start(args, fmt);
- vsnprintf(entry->d, INT_MAX, fmt, args);
- va_end(args);
-
- bch2_journal_res_put(j, &res);
-
- return bch2_journal_flush_seq(j, res.seq);
-}
-
/* block/unlock the journal: */
void bch2_journal_unblock(struct journal *j)
@@ -815,12 +790,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
} else {
ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none,
false, cl);
- if (IS_ERR(ob[nr_got])) {
- ret = cl
- ? -EAGAIN
- : -BCH_ERR_ENOSPC_bucket_alloc;
+ ret = PTR_ERR_OR_ZERO(ob[nr_got]);
+ if (ret)
break;
- }
bu[nr_got] = ob[nr_got]->bucket;
}
@@ -930,7 +902,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
closure_init_stack(&cl);
- while (ja->nr != nr && (ret == 0 || ret == -EAGAIN)) {
+ while (ja->nr != nr && (ret == 0 || ret == -BCH_ERR_bucket_alloc_blocked)) {
struct disk_reservation disk_res = { 0, 0 };
closure_sync(&cl);
@@ -1198,6 +1170,8 @@ void bch2_fs_journal_exit(struct journal *j)
{
unsigned i;
+ darray_exit(&j->early_journal_entries);
+
for (i = 0; i < ARRAY_SIZE(j->buf); i++)
kvpfree(j->buf[i].data, j->buf[i].buf_size);
free_fifo(&j->pin);
diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h
index 51d29a01..ee37f90a 100644
--- a/libbcachefs/journal.h
+++ b/libbcachefs/journal.h
@@ -479,7 +479,7 @@ static inline int bch2_journal_preres_get(struct journal *j,
return 0;
if (flags & JOURNAL_RES_GET_NONBLOCK)
- return -EAGAIN;
+ return -BCH_ERR_journal_preres_get_blocked;
return __bch2_journal_preres_get(j, res, new_u64s, flags);
}
@@ -497,7 +497,6 @@ int bch2_journal_flush_seq(struct journal *, u64);
int bch2_journal_flush(struct journal *);
bool bch2_journal_noflush_seq(struct journal *, u64);
int bch2_journal_meta(struct journal *);
-int bch2_journal_log_msg(struct journal *, const char *, ...);
void bch2_journal_halt(struct journal *);
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 2b1974a9..d6f25934 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -1080,7 +1080,10 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
}
}
-int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
+int bch2_journal_read(struct bch_fs *c,
+ u64 *last_seq,
+ u64 *blacklist_seq,
+ u64 *start_seq)
{
struct journal_list jlist;
struct journal_replay *i, **_i, *prev = NULL;
@@ -1089,7 +1092,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
unsigned iter;
struct printbuf buf = PRINTBUF;
bool degraded = false, last_write_torn = false;
- u64 seq, last_seq = 0;
+ u64 seq;
int ret = 0;
closure_init_stack(&jlist.cl);
@@ -1118,15 +1121,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
if (jlist.ret)
return jlist.ret;
+ *last_seq = 0;
*start_seq = 0;
*blacklist_seq = 0;
/*
* Find most recent flush entry, and ignore newer non flush entries -
* those entries will be blacklisted:
- *
- *
- * XXX check for torn write on last journal entry
*/
genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) {
int write = READ;
@@ -1140,13 +1141,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
*blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1;
if (JSET_NO_FLUSH(&i->j)) {
- journal_replay_free(c, i);
+ i->ignore = true;
continue;
}
if (!last_write_torn && !i->csum_good) {
last_write_torn = true;
- journal_replay_free(c, i);
+ i->ignore = true;
continue;
}
@@ -1157,7 +1158,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
le64_to_cpu(i->j.seq)))
i->j.last_seq = i->j.seq;
- last_seq = le64_to_cpu(i->j.last_seq);
+ *last_seq = le64_to_cpu(i->j.last_seq);
*blacklist_seq = le64_to_cpu(i->j.seq) + 1;
break;
}
@@ -1167,13 +1168,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
return 0;
}
- if (!last_seq) {
+ if (!*last_seq) {
fsck_err(c, "journal read done, but no entries found after dropping non-flushes");
return 0;
}
bch_info(c, "journal read done, replaying entries %llu-%llu",
- last_seq, *blacklist_seq - 1);
+ *last_seq, *blacklist_seq - 1);
if (*start_seq != *blacklist_seq)
bch_info(c, "dropped unflushed entries %llu-%llu",
@@ -1187,7 +1188,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
continue;
seq = le64_to_cpu(i->j.seq);
- if (seq < last_seq) {
+ if (seq < *last_seq) {
journal_replay_free(c, i);
continue;
}
@@ -1195,13 +1196,12 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
if (bch2_journal_seq_is_blacklisted(c, seq, true)) {
fsck_err_on(!JSET_NO_FLUSH(&i->j), c,
"found blacklisted journal entry %llu", seq);
-
- journal_replay_free(c, i);
+ i->ignore = true;
}
}
/* Check for missing entries: */
- seq = last_seq;
+ seq = *last_seq;
genradix_for_each(&c->journal_entries, radix_iter, _i) {
i = *_i;
@@ -1239,7 +1239,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq)
" prev at %s\n"
" next at %s",
missing_start, missing_end,
- last_seq, *blacklist_seq - 1,
+ *last_seq, *blacklist_seq - 1,
buf1.buf, buf2.buf);
printbuf_exit(&buf1);
diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h
index 2f8bbf06..a32c2876 100644
--- a/libbcachefs/journal_io.h
+++ b/libbcachefs/journal_io.h
@@ -52,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *,
void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct journal_replay *);
-int bch2_journal_read(struct bch_fs *, u64 *, u64 *);
+int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *);
void bch2_journal_write(struct closure *);
diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h
index 045ee95a..c8729cb3 100644
--- a/libbcachefs/journal_types.h
+++ b/libbcachefs/journal_types.h
@@ -177,6 +177,8 @@ enum journal_errors {
#undef x
};
+typedef DARRAY(u64) darray_u64;
+
/* Embedded in struct bch_fs */
struct journal {
/* Fastpath stuff up front: */
@@ -204,6 +206,12 @@ struct journal {
unsigned buf_size_want;
/*
+ * We may queue up some things to be journalled (log messages) before
+ * the journal has actually started - stash them here:
+ */
+ darray_u64 early_journal_entries;
+
+ /*
* Two journal entries -- one is currently open for new entries, the
* other is possibly being written out.
*/
diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c
index 380b66c7..e542cd3d 100644
--- a/libbcachefs/lru.c
+++ b/libbcachefs/lru.c
@@ -8,6 +8,7 @@
#include "lru.h"
#include "recovery.h"
+/* KEY_TYPE_lru is obsolete: */
int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k,
int rw, struct printbuf *err)
{
@@ -30,101 +31,57 @@ void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c,
prt_printf(out, "idx %llu", le64_to_cpu(lru->idx));
}
-int bch2_lru_delete(struct btree_trans *trans, u64 id, u64 idx, u64 time,
- struct bkey_s_c orig_k)
+static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id,
+ u64 dev_bucket, u64 time, unsigned key_type)
{
struct btree_iter iter;
- struct bkey_s_c k;
- u64 existing_idx;
- struct printbuf buf = PRINTBUF;
+ struct bkey_i *k;
int ret = 0;
if (!time)
return 0;
- bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
- POS(id, time),
- BTREE_ITER_INTENT|
- BTREE_ITER_WITH_UPDATES);
- k = bch2_btree_iter_peek_slot(&iter);
- ret = bkey_err(k);
- if (ret)
- goto err;
+ k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k));
+ ret = PTR_ERR_OR_ZERO(k);
+ if (unlikely(ret))
+ return ret;
- if (k.k->type != KEY_TYPE_lru) {
- bch2_bkey_val_to_text(&buf, trans->c, orig_k);
- bch2_trans_inconsistent(trans,
- "pointer to nonexistent lru %llu:%llu\n%s",
- id, time, buf.buf);
- ret = -EIO;
- goto err;
- }
+ bkey_init(&k->k);
+ k->k.type = key_type;
+ k->k.p = lru_pos(lru_id, dev_bucket, time);
- existing_idx = le64_to_cpu(bkey_s_c_to_lru(k).v->idx);
- if (existing_idx != idx) {
- bch2_bkey_val_to_text(&buf, trans->c, orig_k);
- bch2_trans_inconsistent(trans,
- "lru %llu:%llu with wrong backpointer: got %llu, should be %llu\n%s",
- id, time, existing_idx, idx, buf.buf);
- ret = -EIO;
- goto err;
- }
+ EBUG_ON(lru_pos_id(k->k.p) != lru_id);
+ EBUG_ON(lru_pos_time(k->k.p) != time);
+ EBUG_ON(k->k.p.offset != dev_bucket);
- ret = bch2_btree_delete_at(trans, &iter, 0);
-err:
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
+ k->k.p, BTREE_ITER_INTENT);
+
+ ret = bch2_btree_iter_traverse(&iter) ?:
+ bch2_trans_update(trans, &iter, k, 0);
bch2_trans_iter_exit(trans, &iter);
- printbuf_exit(&buf);
return ret;
}
-int bch2_lru_set(struct btree_trans *trans, u64 lru_id, u64 idx, u64 *time)
+int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
{
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bkey_i_lru *lru;
- int ret = 0;
-
- if (!*time)
- return 0;
-
- for_each_btree_key_norestart(trans, iter, BTREE_ID_lru,
- POS(lru_id, *time),
- BTREE_ITER_SLOTS|
- BTREE_ITER_INTENT|
- BTREE_ITER_WITH_UPDATES, k, ret)
- if (bkey_deleted(k.k))
- break;
-
- if (ret)
- goto err;
-
- BUG_ON(iter.pos.inode != lru_id);
- *time = iter.pos.offset;
-
- lru = bch2_bkey_alloc(trans, &iter, lru);
- ret = PTR_ERR_OR_ZERO(lru);
- if (ret)
- goto err;
-
- lru->v.idx = cpu_to_le64(idx);
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted);
+}
- ret = bch2_trans_update(trans, &iter, &lru->k_i, 0);
- if (ret)
- goto err;
-err:
- bch2_trans_iter_exit(trans, &iter);
- return ret;
+int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time)
+{
+ return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set);
}
-int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx,
- u64 old_time, u64 *new_time,
- struct bkey_s_c k)
+int bch2_lru_change(struct btree_trans *trans,
+ u16 lru_id, u64 dev_bucket,
+ u64 old_time, u64 new_time)
{
- if (old_time == *new_time)
+ if (old_time == new_time)
return 0;
- return bch2_lru_delete(trans, id, idx, old_time, k) ?:
- bch2_lru_set(trans, id, idx, new_time);
+ return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?:
+ bch2_lru_set(trans, lru_id, dev_bucket, new_time);
}
static int bch2_check_lru_key(struct btree_trans *trans,
@@ -138,12 +95,9 @@ static int bch2_check_lru_key(struct btree_trans *trans,
const struct bch_alloc_v4 *a;
struct printbuf buf1 = PRINTBUF;
struct printbuf buf2 = PRINTBUF;
- struct bpos alloc_pos;
+ struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset);
int ret;
- alloc_pos = POS(lru_k.k->p.inode,
- le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx));
-
if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c,
"lru key points to nonexistent device:bucket %llu:%llu",
alloc_pos.inode, alloc_pos.offset))
@@ -157,10 +111,12 @@ static int bch2_check_lru_key(struct btree_trans *trans,
a = bch2_alloc_to_v4(k, &a_convert);
- if (fsck_err_on(a->data_type != BCH_DATA_cached ||
- a->io_time[READ] != lru_k.k->p.offset, c,
- "incorrect lru entry %s\n"
+ if (fsck_err_on(lru_k.k->type != KEY_TYPE_set ||
+ a->data_type != BCH_DATA_cached ||
+ a->io_time[READ] != lru_pos_time(lru_k.k->p), c,
+ "incorrect lru entry (time %llu) %s\n"
" for %s",
+ lru_pos_time(lru_k.k->p),
(bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf),
(bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) {
ret = bch2_btree_delete_at(trans, lru_iter, 0);
diff --git a/libbcachefs/lru.h b/libbcachefs/lru.h
index 925c29b4..2e22f139 100644
--- a/libbcachefs/lru.h
+++ b/libbcachefs/lru.h
@@ -2,6 +2,26 @@
#ifndef _BCACHEFS_LRU_H
#define _BCACHEFS_LRU_H
+#define LRU_TIME_BITS 48
+#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
+
+static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time)
+{
+ EBUG_ON(time > LRU_TIME_MAX);
+
+ return POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket);
+}
+
+static inline u64 lru_pos_id(struct bpos pos)
+{
+ return pos.inode >> LRU_TIME_BITS;
+}
+
+static inline u64 lru_pos_time(struct bpos pos)
+{
+ return pos.inode & ~(~0ULL << LRU_TIME_BITS);
+}
+
int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *);
void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
@@ -10,9 +30,9 @@ void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
.val_to_text = bch2_lru_to_text, \
})
-int bch2_lru_delete(struct btree_trans *, u64, u64, u64, struct bkey_s_c);
-int bch2_lru_set(struct btree_trans *, u64, u64, u64 *);
-int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *, struct bkey_s_c);
+int bch2_lru_del(struct btree_trans *, u16, u64, u64);
+int bch2_lru_set(struct btree_trans *, u16, u64, u64);
+int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64);
int bch2_check_lrus(struct bch_fs *);
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 47b77b3c..b308354a 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "bcachefs.h"
+#include "alloc_background.h"
#include "alloc_foreground.h"
#include "backpointers.h"
#include "bkey_buf.h"
@@ -257,7 +258,7 @@ static int bch2_move_extent(struct btree_trans *trans,
}
if (!percpu_ref_tryget_live(&c->writes))
- return -EROFS;
+ return -BCH_ERR_erofs_no_writes;
/*
* Before memory allocations & taking nocow locks in
@@ -661,13 +662,29 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
struct btree_iter iter;
struct bkey_buf sk;
struct bch_backpointer bp;
+ struct bch_alloc_v4 a_convert;
+ const struct bch_alloc_v4 *a;
+ struct bkey_s_c k;
struct data_update_opts data_opts;
+ unsigned dirty_sectors, bucket_size;
u64 bp_offset = 0, cur_inum = U64_MAX;
int ret = 0;
bch2_bkey_buf_init(&sk);
bch2_trans_init(&trans, c, 0, 0);
+ bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc,
+ bucket, BTREE_ITER_CACHED);
+ ret = lockrestart_do(&trans,
+ bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ bch2_trans_iter_exit(&trans, &iter);
+
+ if (!ret) {
+ a = bch2_alloc_to_v4(k, &a_convert);
+ dirty_sectors = a->dirty_sectors;
+ bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size;
+ }
+
while (!(ret = move_ratelimit(&trans, ctxt))) {
bch2_trans_begin(&trans);
@@ -765,6 +782,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
bp_offset++;
}
+ trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret);
+
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) {
bch2_trans_unlock(&trans);
move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads));
diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c
index fbc8043e..f0ab65ff 100644
--- a/libbcachefs/movinggc.c
+++ b/libbcachefs/movinggc.c
@@ -163,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c)
bch2_moving_ctxt_exit(&ctxt);
- if (ret < 0 && ret != -EROFS)
+ if (ret < 0 && !bch2_err_matches(ret, EROFS))
bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret));
trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0);
diff --git a/libbcachefs/nocow_locking.c b/libbcachefs/nocow_locking.c
index b325fb10..bff62671 100644
--- a/libbcachefs/nocow_locking.c
+++ b/libbcachefs/nocow_locking.c
@@ -4,12 +4,116 @@
#include "nocow_locking.h"
#include "util.h"
+#include <linux/closure.h>
+
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket)
+{
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (l->b[i] == dev_bucket && atomic_read(&l->l[i]))
+ return true;
+ return false;
+}
+
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags)
+{
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
+ int lock_val = flags ? 1 : -1;
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (l->b[i] == dev_bucket) {
+ if (!atomic_sub_return(lock_val, &l->l[i]))
+ closure_wake_up(&l->wait);
+ return;
+ }
+
+ BUG();
+}
+
+static bool bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l,
+ u64 dev_bucket, int flags)
+{
+ int v, lock_val = flags ? 1 : -1;
+ unsigned i;
+
+ spin_lock(&l->lock);
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (l->b[i] == dev_bucket)
+ goto got_entry;
+
+ for (i = 0; i < ARRAY_SIZE(l->b); i++)
+ if (!atomic_read(&l->l[i])) {
+ l->b[i] = dev_bucket;
+ goto take_lock;
+ }
+fail:
+ spin_unlock(&l->lock);
+ return false;
+got_entry:
+ v = atomic_read(&l->l[i]);
+ if (lock_val > 0 ? v < 0 : v > 0)
+ goto fail;
+take_lock:
+ atomic_add(lock_val, &l->l[i]);
+ spin_unlock(&l->lock);
+ return true;
+}
+
void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
- two_state_lock_t *l, int flags)
+ struct nocow_lock_bucket *l,
+ u64 dev_bucket, int flags)
+{
+ if (!bch2_bucket_nocow_trylock(l, dev_bucket, flags)) {
+ struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
+ u64 start_time = local_clock();
+
+ __closure_wait_event(&l->wait, bch2_bucket_nocow_trylock(l, dev_bucket, flags));
+ bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+ }
+}
+
+void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t)
{
- struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks);
- u64 start_time = local_clock();
+ unsigned i, nr_zero = 0;
+ struct nocow_lock_bucket *l;
+
+ for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) {
+ unsigned v = 0;
+
+ for (i = 0; i < ARRAY_SIZE(l->l); i++)
+ v |= atomic_read(&l->l[i]);
+
+ if (!v) {
+ nr_zero++;
+ continue;
+ }
+
+ if (nr_zero)
+ prt_printf(out, "(%u empty entries)\n", nr_zero);
+ nr_zero = 0;
+
+ for (i = 0; i < ARRAY_SIZE(l->l); i++)
+ if (atomic_read(&l->l[i]))
+ prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i]));
+ prt_newline(out);
+ }
+
+ if (nr_zero)
+ prt_printf(out, "(%u empty entries)\n", nr_zero);
+}
+
+int bch2_fs_nocow_locking_init(struct bch_fs *c)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++)
+ spin_lock_init(&c->nocow_locks.l[i].lock);
- __bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE);
- bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time);
+ return 0;
}
diff --git a/libbcachefs/nocow_locking.h b/libbcachefs/nocow_locking.h
index 2a7a9f44..45258cc3 100644
--- a/libbcachefs/nocow_locking.h
+++ b/libbcachefs/nocow_locking.h
@@ -2,54 +2,38 @@
#ifndef _BCACHEFS_NOCOW_LOCKING_H
#define _BCACHEFS_NOCOW_LOCKING_H
-#include "bcachefs_format.h"
-#include "two_state_shared_lock.h"
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "nocow_locking_types.h"
#include <linux/hash.h>
-#define BUCKET_NOCOW_LOCKS_BITS 10
-#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS)
-
-struct bucket_nocow_lock_table {
- two_state_lock_t l[BUCKET_NOCOW_LOCKS];
-};
-
-#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0)
-
-static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
- struct bpos bucket)
+static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t,
+ u64 dev_bucket)
{
- u64 dev_bucket = bucket.inode << 56 | bucket.offset;
unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS);
return t->l + (h & (BUCKET_NOCOW_LOCKS - 1));
}
-static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t,
- struct bpos bucket)
-{
- two_state_lock_t *l = bucket_nocow_lock(t, bucket);
-
- return atomic_long_read(&l->v) != 0;
-}
-
-static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t,
- struct bpos bucket, int flags)
-{
- two_state_lock_t *l = bucket_nocow_lock(t, bucket);
-
- bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE);
-}
+#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0)
-void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int);
+bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos);
+void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int);
+void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *,
+ struct nocow_lock_bucket *, u64, int);
static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t,
struct bpos bucket, int flags)
{
- two_state_lock_t *l = bucket_nocow_lock(t, bucket);
+ u64 dev_bucket = bucket_to_u64(bucket);
+ struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket);
- if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE))
- __bch2_bucket_nocow_lock(t, l, flags);
+ __bch2_bucket_nocow_lock(t, l, dev_bucket, flags);
}
+void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *);
+
+int bch2_fs_nocow_locking_init(struct bch_fs *);
+
#endif /* _BCACHEFS_NOCOW_LOCKING_H */
diff --git a/libbcachefs/nocow_locking_types.h b/libbcachefs/nocow_locking_types.h
new file mode 100644
index 00000000..bd12bf67
--- /dev/null
+++ b/libbcachefs/nocow_locking_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H
+#define _BCACHEFS_NOCOW_LOCKING_TYPES_H
+
+#define BUCKET_NOCOW_LOCKS_BITS 10
+#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS)
+
+struct nocow_lock_bucket {
+ struct closure_waitlist wait;
+ spinlock_t lock;
+ u64 b[4];
+ atomic_t l[4];
+} __aligned(SMP_CACHE_BYTES);
+
+struct bucket_nocow_lock_table {
+ struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS];
+};
+
+#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */
+
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 4fd8ce55..8df94ad5 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -129,12 +129,12 @@ search:
if (!*idx)
*idx = __bch2_journal_key_search(keys, btree_id, level, pos);
- while (*idx < keys->nr &&
- (k = idx_to_key(keys, *idx),
- k->btree_id == btree_id &&
- k->level == level &&
- bpos_le(k->k->k.p, end_pos))) {
- if (bpos_ge(k->k->k.p, pos) && !k->overwritten)
+ while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) {
+ if (__journal_key_cmp(btree_id, level, end_pos, k) < 0)
+ return NULL;
+
+ if (__journal_key_cmp(btree_id, level, pos, k) <= 0 &&
+ !k->overwritten)
return k->k;
(*idx)++;
@@ -588,7 +588,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
return cmp_int(l->journal_seq, r->journal_seq);
}
-static int bch2_journal_replay(struct bch_fs *c)
+static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq)
{
struct journal_keys *keys = &c->journal_keys;
struct journal_key **keys_sorted, *k;
@@ -610,6 +610,13 @@ static int bch2_journal_replay(struct bch_fs *c)
sizeof(keys_sorted[0]),
journal_sort_seq_cmp, NULL);
+ if (keys->nr) {
+ ret = bch2_fs_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
+ keys->nr, start_seq, end_seq);
+ if (ret)
+ goto err;
+ }
+
for (i = 0; i < keys->nr; i++) {
k = keys_sorted[i];
@@ -625,8 +632,8 @@ static int bch2_journal_replay(struct bch_fs *c)
: 0),
bch2_journal_replay_key(&trans, k));
if (ret) {
- bch_err(c, "journal replay: error %d while replaying key at btree %s level %u",
- ret, bch2_btree_ids[k->btree_id], k->level);
+ bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s",
+ bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret));
goto err;
}
}
@@ -639,7 +646,7 @@ static int bch2_journal_replay(struct bch_fs *c)
ret = bch2_journal_error(j);
if (keys->nr && !ret)
- bch2_journal_log_msg(&c->journal, "journal replay finished");
+ bch2_fs_log_msg(c, "journal replay finished");
err:
kvfree(keys_sorted);
return ret;
@@ -922,6 +929,7 @@ static bool btree_id_is_alloc(enum btree_id id)
case BTREE_ID_backpointers:
case BTREE_ID_need_discard:
case BTREE_ID_freespace:
+ case BTREE_ID_bucket_gens:
return true;
default:
return false;
@@ -1044,7 +1052,7 @@ int bch2_fs_recovery(struct bch_fs *c)
const char *err = "cannot allocate memory";
struct bch_sb_field_clean *clean = NULL;
struct jset *last_journal_entry = NULL;
- u64 blacklist_seq, journal_seq;
+ u64 last_seq, blacklist_seq, journal_seq;
bool write_sb = false;
int ret = 0;
@@ -1086,14 +1094,11 @@ int bch2_fs_recovery(struct bch_fs *c)
}
if (!c->opts.nochanges) {
- if (c->sb.version < bcachefs_metadata_version_backpointers) {
+ if (c->sb.version < bcachefs_metadata_version_lru_v2) {
bch_info(c, "version prior to backpointers, upgrade and fsck required");
c->opts.version_upgrade = true;
c->opts.fsck = true;
c->opts.fix_errors = FSCK_OPT_YES;
- } else if (c->sb.version < bcachefs_metadata_version_inode_v3) {
- bch_info(c, "version prior to inode_v3, upgrade required");
- c->opts.version_upgrade = true;
}
}
@@ -1114,7 +1119,7 @@ int bch2_fs_recovery(struct bch_fs *c)
struct journal_replay **i;
bch_verbose(c, "starting journal read");
- ret = bch2_journal_read(c, &blacklist_seq, &journal_seq);
+ ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq);
if (ret)
goto err;
@@ -1142,7 +1147,15 @@ int bch2_fs_recovery(struct bch_fs *c)
if (!last_journal_entry) {
fsck_err_on(!c->sb.clean, c, "no journal entries found");
- goto use_clean;
+ if (clean)
+ goto use_clean;
+
+ genradix_for_each_reverse(&c->journal_entries, iter, i)
+ if (*i) {
+ last_journal_entry = &(*i)->j;
+ (*i)->ignore = false;
+ break;
+ }
}
ret = journal_keys_sort(c);
@@ -1188,7 +1201,9 @@ use_clean:
journal_seq += 8;
if (blacklist_seq != journal_seq) {
- ret = bch2_journal_seq_blacklist_add(c,
+ ret = bch2_fs_log_msg(c, "blacklisting entries %llu-%llu",
+ blacklist_seq, journal_seq) ?:
+ bch2_journal_seq_blacklist_add(c,
blacklist_seq, journal_seq);
if (ret) {
bch_err(c, "error creating new journal seq blacklist entry");
@@ -1196,7 +1211,9 @@ use_clean:
}
}
- ret = bch2_fs_journal_start(&c->journal, journal_seq);
+ ret = bch2_fs_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
+ journal_seq, last_seq, blacklist_seq - 1) ?:
+ bch2_fs_journal_start(&c->journal, journal_seq);
if (ret)
goto err;
@@ -1245,13 +1262,6 @@ use_clean:
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
- bch_info(c, "checking need_discard and freespace btrees");
- err = "error checking need_discard and freespace btrees";
- ret = bch2_check_alloc_info(c);
- if (ret)
- goto err;
- bch_verbose(c, "done checking need_discard and freespace btrees");
-
if (c->sb.version < bcachefs_metadata_version_snapshot_2) {
err = "error creating root snapshot node";
ret = bch2_fs_initialize_subvolumes(c);
@@ -1270,12 +1280,21 @@ use_clean:
bch_info(c, "starting journal replay, %zu keys", c->journal_keys.nr);
err = "journal replay failed";
- ret = bch2_journal_replay(c);
+ ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1);
if (ret)
goto err;
if (c->opts.verbose || !c->sb.clean)
bch_info(c, "journal replay done");
+ bch_info(c, "checking need_discard and freespace btrees");
+ err = "error checking need_discard and freespace btrees";
+ ret = bch2_check_alloc_info(c);
+ if (ret)
+ goto err;
+ bch_verbose(c, "done checking need_discard and freespace btrees");
+
+ set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
+
bch_info(c, "checking lrus");
err = "error checking lrus";
ret = bch2_check_lrus(c);
@@ -1315,6 +1334,7 @@ use_clean:
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
} else {
set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags);
+ set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags);
set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags);
set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags);
set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags);
@@ -1341,7 +1361,7 @@ use_clean:
bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr);
err = "journal replay failed";
- ret = bch2_journal_replay(c);
+ ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1);
if (ret)
goto err;
if (c->opts.verbose || !c->sb.clean)
diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c
index ec672fed..e89a9a1a 100644
--- a/libbcachefs/reflink.c
+++ b/libbcachefs/reflink.c
@@ -283,7 +283,7 @@ s64 bch2_remap_range(struct bch_fs *c,
int ret = 0, ret2 = 0;
if (!percpu_ref_tryget_live(&c->writes))
- return -EROFS;
+ return -BCH_ERR_erofs_no_writes;
bch2_check_set_feature(c, BCH_FEATURE_reflink);
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 0aa243f5..738b68b5 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -804,6 +804,11 @@ int bch2_write_super(struct bch_fs *c)
closure_init_stack(cl);
memset(&sb_written, 0, sizeof(sb_written));
+ if (c->opts.version_upgrade) {
+ c->disk_sb.sb->magic = BCHFS_MAGIC;
+ c->disk_sb.sb->layout.magic = BCHFS_MAGIC;
+ }
+
le64_add_cpu(&c->disk_sb.sb->seq, 1);
if (test_bit(BCH_FS_ERROR, &c->flags))
@@ -858,7 +863,7 @@ int bch2_write_super(struct bch_fs *c)
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
percpu_ref_put(&ca->io_ref);
- ret = -EROFS;
+ ret = -BCH_ERR_erofs_sb_err;
goto out;
}
@@ -868,7 +873,7 @@ int bch2_write_super(struct bch_fs *c)
le64_to_cpu(ca->sb_read_scratch->seq),
ca->disk_sb.seq);
percpu_ref_put(&ca->io_ref);
- ret = -EROFS;
+ ret = -BCH_ERR_erofs_sb_err;
goto out;
}
}
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 7cac0567..95c16f70 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -37,6 +37,7 @@
#include "move.h"
#include "migrate.h"
#include "movinggc.h"
+#include "nocow_locking.h"
#include "quota.h"
#include "rebalance.h"
#include "recovery.h"
@@ -803,6 +804,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_subvolumes_init(c) ?:
bch2_fs_io_init(c) ?:
+ bch2_fs_nocow_locking_init(c) ?:
bch2_fs_encryption_init(c) ?:
bch2_fs_compress_init(c) ?:
bch2_fs_ec_init(c) ?:
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 647d018b..6e49cf98 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -27,6 +27,7 @@
#include "journal.h"
#include "keylist.h"
#include "move.h"
+#include "nocow_locking.h"
#include "opts.h"
#include "rebalance.h"
#include "replicas.h"
@@ -194,6 +195,7 @@ read_attribute(btree_cache);
read_attribute(btree_key_cache);
read_attribute(stripes_heap);
read_attribute(open_buckets);
+read_attribute(nocow_lock_table);
read_attribute(internal_uuid);
@@ -445,6 +447,9 @@ SHOW(bch2_fs)
if (attr == &sysfs_data_jobs)
data_progress_to_text(out, c);
+ if (attr == &sysfs_nocow_lock_table)
+ bch2_nocow_locks_to_text(out, &c->nocow_locks);
+
return 0;
}
@@ -627,6 +632,7 @@ struct attribute *bch2_fs_internal_files[] = {
&sysfs_new_stripes,
&sysfs_stripes_heap,
&sysfs_open_buckets,
+ &sysfs_nocow_lock_table,
&sysfs_io_timers_read,
&sysfs_io_timers_write,
diff --git a/linux/shrinker.c b/linux/shrinker.c
index 23e288d8..0b5715b3 100644
--- a/linux/shrinker.c
+++ b/linux/shrinker.c
@@ -1,6 +1,7 @@
#include <stdio.h>
+#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/mutex.h>
@@ -126,3 +127,31 @@ void run_shrinkers(gfp_t gfp_mask, bool allocation_failed)
}
mutex_unlock(&shrinker_lock);
}
+
+static int shrinker_thread(void *arg)
+{
+ while (!kthread_should_stop()) {
+ sleep(1);
+ run_shrinkers(GFP_KERNEL, false);
+ }
+
+ return 0;
+}
+
+struct task_struct *shrinker_task;
+
+__attribute__((constructor(103)))
+static void shrinker_thread_init(void)
+{
+ shrinker_task = kthread_run(shrinker_thread, NULL, "shrinkers");
+ BUG_ON(IS_ERR(shrinker_task));
+}
+
+__attribute__((destructor(103)))
+static void shrinker_thread_exit(void)
+{
+ int ret = kthread_stop(shrinker_task);
+ BUG_ON(ret);
+
+ shrinker_task = NULL;
+}