diff options
53 files changed, 852 insertions, 525 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision index 0ba18448..615d94b8 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -de3b30303e8a52dcbf738065efb4cf183fdbf1c1 +0939e1c73231c779c961e1143e1ba489ef2b168c diff --git a/cmd_device.c b/cmd_device.c index e3c5d513..c59d3709 100644 --- a/cmd_device.c +++ b/cmd_device.c @@ -14,6 +14,7 @@ #include "libbcachefs/bcachefs.h" #include "libbcachefs/bcachefs_ioctl.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/journal.h" #include "libbcachefs/super-io.h" #include "cmds.h" @@ -410,7 +411,7 @@ int cmd_device_set_state(int argc, char *argv[]) int ret = bch2_read_super(dev_str, &opts, &sb); if (ret) - die("error opening %s: %s", dev_str, strerror(-ret)); + die("error opening %s: %s", dev_str, bch2_err_str(ret)); struct bch_member *m = bch2_sb_get_members(sb.sb)->members + sb.sb->dev_idx; @@ -527,7 +528,7 @@ int cmd_device_resize(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty()); if (IS_ERR(c)) - die("error opening %s: %s", dev, strerror(-PTR_ERR(c))); + die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c))); struct bch_dev *ca, *resize = NULL; unsigned i; @@ -547,7 +548,7 @@ int cmd_device_resize(int argc, char *argv[]) printf("resizing %s to %llu buckets\n", dev, nbuckets); int ret = bch2_dev_resize(c, resize, nbuckets); if (ret) - fprintf(stderr, "resize error: %s\n", strerror(-ret)); + fprintf(stderr, "resize error: %s\n", bch2_err_str(ret)); percpu_ref_put(&resize->io_ref); bch2_fs_stop(c); @@ -630,7 +631,7 @@ int cmd_device_resize_journal(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(&dev, 1, bch2_opts_empty()); if (IS_ERR(c)) - die("error opening %s: %s", dev, strerror(-PTR_ERR(c))); + die("error opening %s: %s", dev, bch2_err_str(PTR_ERR(c))); struct bch_dev *ca, *resize = NULL; unsigned i; @@ -647,7 +648,7 @@ int cmd_device_resize_journal(int argc, char *argv[]) printf("resizing journal on %s to %llu buckets\n", dev, nbuckets); int ret = bch2_set_nr_journal_buckets(c, resize, nbuckets); if (ret) - fprintf(stderr, "resize error: %s\n", strerror(-ret)); + fprintf(stderr, "resize error: %s\n", bch2_err_str(ret)); percpu_ref_put(&resize->io_ref); bch2_fs_stop(c); @@ -82,7 +82,7 @@ static void dump_one_device(struct bch_fs *c, struct bch_dev *ca, int fd, } if (ret) - die("error %s walking btree nodes", strerror(-ret)); + die("error %s walking btree nodes", bch2_err_str(ret)); b = c->btree_roots[i].b; if (!btree_node_fake(b)) { @@ -147,7 +147,7 @@ int cmd_dump(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(argv, argc, opts); if (IS_ERR(c)) - die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c))); + die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); down_read(&c->gc_lock); diff --git a/cmd_format.c b/cmd_format.c index 4debc285..26a1cd9f 100644 --- a/cmd_format.c +++ b/cmd_format.c @@ -24,6 +24,7 @@ #include "libbcachefs.h" #include "crypto.h" #include "libbcachefs/darray.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/opts.h" #include "libbcachefs/super-io.h" #include "libbcachefs/util.h" @@ -218,6 +219,9 @@ int cmd_format(int argc, char *argv[]) break; } + if (opts.version != bcachefs_metadata_version_current) + initialize = false; + if (!devices.nr) die("Please supply a device"); @@ -270,7 +274,7 @@ int cmd_format(int argc, char *argv[]) mount_opts); if (IS_ERR(c)) die("error opening %s: %s", device_paths.data[0], - strerror(-PTR_ERR(c))); + bch2_err_str(PTR_ERR(c))); bch2_fs_stop(c); } @@ -336,7 +340,7 @@ int cmd_show_super(int argc, char *argv[]) struct bch_sb_handle sb; int ret = bch2_read_super(dev, &opts, &sb); if (ret) - die("Error opening %s: %s", dev, strerror(-ret)); + die("Error opening %s: %s", dev, bch2_err_str(ret)); struct printbuf buf = PRINTBUF; @@ -89,7 +89,7 @@ int cmd_fsck(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(argv, argc, opts); if (IS_ERR(c)) { - fprintf(stderr, "error opening %s: %s\n", argv[0], strerror(-PTR_ERR(c))); + fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c))); exit(8); } diff --git a/cmd_fusemount.c b/cmd_fusemount.c index 216094f0..4470f838 100644 --- a/cmd_fusemount.c +++ b/cmd_fusemount.c @@ -17,6 +17,7 @@ #include "libbcachefs/btree_iter.h" #include "libbcachefs/buckets.h" #include "libbcachefs/dirent.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/error.h" #include "libbcachefs/fs-common.h" #include "libbcachefs/inode.h" @@ -1229,7 +1230,7 @@ int cmd_fusemount(int argc, char *argv[]) c = bch2_fs_open(ctx.devices, ctx.nr_devices, bch_opts); if (IS_ERR(c)) die("error opening %s: %s", ctx.devices_str, - strerror(-PTR_ERR(c))); + bch2_err_str(PTR_ERR(c))); /* Fuse */ struct fuse_session *se = @@ -55,7 +55,7 @@ int cmd_unlock(int argc, char *argv[]) struct bch_sb_handle sb; int ret = bch2_read_super(dev, &opts, &sb); if (ret) - die("Error opening %s: %s", dev, strerror(-ret)); + die("Error opening %s: %s", dev, bch2_err_str(ret)); if (!bch2_sb_is_encrypted(sb.sb)) die("%s is not encrypted", dev); @@ -90,7 +90,7 @@ int cmd_set_passphrase(int argc, char *argv[]) c = bch2_fs_open(argv + 1, argc - 1, opts); if (IS_ERR(c)) - die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c))); + die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c))); struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb); if (!crypt) @@ -127,7 +127,7 @@ int cmd_remove_passphrase(int argc, char *argv[]) opt_set(opts, nostart, true); c = bch2_fs_open(argv + 1, argc - 1, opts); if (IS_ERR(c)) - die("Error opening %s: %s", argv[1], strerror(-PTR_ERR(c))); + die("Error opening %s: %s", argv[1], bch2_err_str(PTR_ERR(c))); struct bch_sb_field_crypt *crypt = bch2_sb_get_crypt(c->disk_sb.sb); if (!crypt) diff --git a/cmd_kill_btree_node.c b/cmd_kill_btree_node.c index a0e0fc9b..a8915a1f 100644 --- a/cmd_kill_btree_node.c +++ b/cmd_kill_btree_node.c @@ -9,6 +9,7 @@ #include "libbcachefs/bcachefs.h" #include "libbcachefs/btree_iter.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/error.h" #include "libbcachefs/super.h" @@ -60,7 +61,7 @@ int cmd_kill_btree_node(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(argv, argc, opts); if (IS_ERR(c)) - die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c))); + die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); struct btree_trans trans; struct btree_iter iter; @@ -70,7 +71,7 @@ int cmd_kill_btree_node(int argc, char *argv[]) ret = posix_memalign(&zeroes, c->opts.block_size, c->opts.block_size); if (ret) - die("error %s from posix_memalign", strerror(ret)); + die("error %s from posix_memalign", bch2_err_str(ret)); bch2_trans_init(&trans, c, 0, 0); @@ -67,7 +67,7 @@ static void list_btree_formats(struct bch_fs *c, enum btree_id btree_id, unsigne bch2_trans_iter_exit(&trans, &iter); if (ret) - die("error %s walking btree nodes", strerror(-ret)); + die("error %s walking btree nodes", bch2_err_str(ret)); bch2_trans_exit(&trans); printbuf_exit(&buf); @@ -96,7 +96,7 @@ static void list_nodes(struct bch_fs *c, enum btree_id btree_id, unsigned level, bch2_trans_iter_exit(&trans, &iter); if (ret) - die("error %s walking btree nodes", strerror(-ret)); + die("error %s walking btree nodes", bch2_err_str(ret)); bch2_trans_exit(&trans); printbuf_exit(&buf); @@ -232,7 +232,7 @@ static void list_nodes_ondisk(struct bch_fs *c, enum btree_id btree_id, unsigned bch2_trans_iter_exit(&trans, &iter); if (ret) - die("error %s walking btree nodes", strerror(-ret)); + die("error %s walking btree nodes", bch2_err_str(ret)); bch2_trans_exit(&trans); printbuf_exit(&buf); @@ -270,7 +270,7 @@ static void list_nodes_keys(struct bch_fs *c, enum btree_id btree_id, unsigned l bch2_trans_iter_exit(&trans, &iter); if (ret) - die("error %s walking btree nodes", strerror(-ret)); + die("error %s walking btree nodes", bch2_err_str(ret)); bch2_trans_exit(&trans); printbuf_exit(&buf); @@ -376,7 +376,7 @@ int cmd_list(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(argv, argc, opts); if (IS_ERR(c)) - die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c))); + die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); for (btree_id = btree_id_start; diff --git a/cmd_list_journal.c b/cmd_list_journal.c index 0836ebfc..e89f7de9 100644 --- a/cmd_list_journal.c +++ b/cmd_list_journal.c @@ -9,6 +9,7 @@ #include "libbcachefs/bcachefs.h" #include "libbcachefs/btree_iter.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/error.h" #include "libbcachefs/journal_io.h" #include "libbcachefs/journal_seq_blacklist.h" @@ -75,7 +76,7 @@ int cmd_list_journal(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(argv, argc, opts); if (IS_ERR(c)) - die("error opening %s: %s", argv[0], strerror(-PTR_ERR(c))); + die("error opening %s: %s", argv[0], bch2_err_str(PTR_ERR(c))); struct journal_replay *p, **_p; struct genradix_iter iter; diff --git a/cmd_migrate.c b/cmd_migrate.c index 707f13e1..5a35c5a1 100644 --- a/cmd_migrate.c +++ b/cmd_migrate.c @@ -30,6 +30,7 @@ #include "libbcachefs/btree_update.h" #include "libbcachefs/buckets.h" #include "libbcachefs/dirent.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/fs-common.h" #include "libbcachefs/inode.h" #include "libbcachefs/io.h" @@ -127,7 +128,7 @@ static void update_inode(struct bch_fs *c, ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed.inode.k_i, NULL, NULL, 0); if (ret) - die("error updating inode: %s", strerror(-ret)); + die("error updating inode: %s", bch2_err_str(ret)); } static void create_link(struct bch_fs *c, @@ -143,7 +144,7 @@ static void create_link(struct bch_fs *c, (subvol_inum) { 1, parent->bi_inum }, &parent_u, (subvol_inum) { 1, inum }, &inode, &qstr)); if (ret) - die("error creating hardlink: %s", strerror(-ret)); + die("error creating hardlink: %s", bch2_err_str(ret)); } static struct bch_inode_unpacked create_file(struct bch_fs *c, @@ -164,7 +165,7 @@ static struct bch_inode_unpacked create_file(struct bch_fs *c, uid, gid, mode, rdev, NULL, NULL, (subvol_inum) {}, 0)); if (ret) - die("error creating %s: %s", name, strerror(-ret)); + die("error creating %s: %s", name, bch2_err_str(ret)); return new_inode; } @@ -235,7 +236,7 @@ static void copy_xattrs(struct bch_fs *c, struct bch_inode_unpacked *dst, &hash_info, attr, val, val_size, h->flags, 0)); if (ret < 0) - die("error creating xattr: %s", strerror(-ret)); + die("error creating xattr: %s", bch2_err_str(ret)); } } @@ -270,7 +271,7 @@ static void write_data(struct bch_fs *c, int ret = bch2_disk_reservation_get(c, &op.res, len >> 9, c->opts.data_replicas, 0); if (ret) - die("error reserving space in new filesystem: %s", strerror(-ret)); + die("error reserving space in new filesystem: %s", bch2_err_str(ret)); closure_call(&op.cl, bch2_write, NULL, &cl); @@ -335,12 +336,12 @@ static void link_data(struct bch_fs *c, struct bch_inode_unpacked *dst, BCH_DISK_RESERVATION_NOFAIL); if (ret) die("error reserving space in new filesystem: %s", - strerror(-ret)); + bch2_err_str(ret)); ret = bch2_btree_insert(c, BTREE_ID_extents, &e->k_i, &res, NULL, 0); if (ret) - die("btree insert error %s", strerror(-ret)); + die("btree insert error %s", bch2_err_str(ret)); bch2_disk_reservation_put(c, &res); @@ -581,7 +582,7 @@ static void copy_fs(struct bch_fs *c, int src_fd, const char *src_path, int ret = bch2_inode_find_by_inum(c, (subvol_inum) { 1, BCACHEFS_ROOT_INO }, &root_inode); if (ret) - die("error looking up root directory: %s", strerror(-ret)); + die("error looking up root directory: %s", bch2_err_str(ret)); if (fchdir(src_fd)) die("chdir error: %m"); @@ -706,13 +707,13 @@ static int migrate_fs(const char *fs_path, c = bch2_fs_open(path, 1, opts); if (IS_ERR(c)) - die("Error opening new filesystem: %s", strerror(-PTR_ERR(c))); + die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c))); mark_unreserved_space(c, extents); int ret = bch2_fs_start(c); if (ret) - die("Error starting new filesystem: %s", strerror(-ret)); + die("Error starting new filesystem: %s", bch2_err_str(ret)); copy_fs(c, fs_fd, fs_path, bcachefs_inum, &extents); @@ -724,7 +725,7 @@ static int migrate_fs(const char *fs_path, c = bch2_fs_open(path, 1, opts); if (IS_ERR(c)) - die("Error opening new filesystem: %s", strerror(-PTR_ERR(c))); + die("Error opening new filesystem: %s", bch2_err_str(PTR_ERR(c))); bch2_fs_stop(c); printf("fsck complete\n"); diff --git a/cmd_option.c b/cmd_option.c index 86768e5d..6ce34016 100644 --- a/cmd_option.c +++ b/cmd_option.c @@ -20,6 +20,7 @@ #include "cmds.h" #include "libbcachefs.h" +#include "libbcachefs/errcode.h" #include "libbcachefs/opts.h" #include "libbcachefs/super-io.h" @@ -64,7 +65,7 @@ int cmd_set_option(int argc, char *argv[]) struct bch_fs *c = bch2_fs_open(argv, argc, open_opts); if (IS_ERR(c)) { - fprintf(stderr, "error opening %s: %s\n", argv[0], strerror(-PTR_ERR(c))); + fprintf(stderr, "error opening %s: %s\n", argv[0], bch2_err_str(PTR_ERR(c))); exit(EXIT_FAILURE); } diff --git a/include/linux/slab.h b/include/linux/slab.h index 17fe235e..cf48570c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -20,12 +20,10 @@ static inline void *kmalloc(size_t size, gfp_t flags) { - unsigned i = 0; + unsigned i; void *p; - do { - run_shrinkers(flags, i != 0); - + for (i = 0; i < 10; i++) { if (size) { size_t alignment = min(rounddown_pow_of_two(size), (size_t)PAGE_SIZE); alignment = max(sizeof(void *), alignment); @@ -34,9 +32,15 @@ static inline void *kmalloc(size_t size, gfp_t flags) } else { p = malloc(0); } - if (p && (flags & __GFP_ZERO)) - memset(p, 0, size); - } while (!p && i++ < 10); + + if (p) { + if (flags & __GFP_ZERO) + memset(p, 0, size); + break; + } + + run_shrinkers(flags, true); + } return p; } @@ -93,16 +97,20 @@ static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t static inline struct page *alloc_pages(gfp_t flags, unsigned int order) { size_t size = PAGE_SIZE << order; - unsigned i = 0; + unsigned i; void *p; - do { - run_shrinkers(flags, i != 0); - + for (i = 0; i < 10; i++) { p = aligned_alloc(PAGE_SIZE, size); - if (p && (flags & __GFP_ZERO)) - memset(p, 0, size); - } while (!p && i++ < 10); + + if (p) { + if (flags & __GFP_ZERO) + memset(p, 0, size); + break; + } + + run_shrinkers(flags, true); + } return p; } @@ -193,20 +201,24 @@ static inline struct kmem_cache *kmem_cache_create(size_t obj_size) #define vfree(p) free(p) -static inline void *__vmalloc(unsigned long size, gfp_t gfp_mask) +static inline void *__vmalloc(unsigned long size, gfp_t flags) { - unsigned i = 0; + unsigned i; void *p; size = round_up(size, PAGE_SIZE); - do { - run_shrinkers(gfp_mask, i != 0); - + for (i = 0; i < 10; i++) { p = aligned_alloc(PAGE_SIZE, size); - if (p && gfp_mask & __GFP_ZERO) - memset(p, 0, size); - } while (!p && i++ < 10); + + if (p) { + if (flags & __GFP_ZERO) + memset(p, 0, size); + break; + } + + run_shrinkers(flags, true); + } return p; } diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h index 47ba750d..f699146a 100644 --- a/include/trace/events/bcachefs.h +++ b/include/trace/events/bcachefs.h @@ -564,6 +564,7 @@ TRACE_EVENT(bucket_alloc_fail, __field(u64, need_journal_commit ) __field(u64, nouse ) __field(bool, nonblocking ) + __field(u64, nocow ) __array(char, err, 32 ) ), @@ -579,10 +580,11 @@ TRACE_EVENT(bucket_alloc_fail, __entry->need_journal_commit = s->skipped_need_journal_commit; __entry->nouse = s->skipped_nouse; __entry->nonblocking = nonblocking; + __entry->nocow = s->skipped_nocow; strscpy(__entry->err, err, sizeof(__entry->err)); ), - TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u err %s", + TP_printk("%d,%d reserve %s free %llu avail %llu copygc_wait %llu/%lli seen %llu open %llu need_journal_commit %llu nouse %llu nonblocking %u nocow %llu err %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->reserve, __entry->free, @@ -594,6 +596,7 @@ TRACE_EVENT(bucket_alloc_fail, __entry->need_journal_commit, __entry->nouse, __entry->nonblocking, + __entry->nocow, __entry->err) ); @@ -702,6 +705,37 @@ TRACE_EVENT(move_data, __entry->sectors_moved, __entry->keys_moved) ); +TRACE_EVENT(evacuate_bucket, + TP_PROTO(struct bch_fs *c, struct bpos *bucket, + unsigned sectors, unsigned bucket_size, + int ret), + TP_ARGS(c, bucket, sectors, bucket_size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __field(u64, member ) + __field(u64, bucket ) + __field(u32, sectors ) + __field(u32, bucket_size ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->dev = c->dev; + __entry->member = bucket->inode; + __entry->bucket = bucket->offset; + __entry->sectors = sectors; + __entry->bucket_size = bucket_size; + __entry->ret = ret; + ), + + TP_printk("%d,%d %llu:%llu sectors %u/%u ret %i", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->member, __entry->bucket, + __entry->sectors, __entry->bucket_size, + __entry->ret) +); + TRACE_EVENT(copygc, TP_PROTO(struct bch_fs *c, u64 sectors_moved, u64 sectors_not_moved, diff --git a/libbcachefs.h b/libbcachefs.h index 17e8eef3..4bb51bd8 100644 --- a/libbcachefs.h +++ b/libbcachefs.h @@ -41,8 +41,12 @@ struct format_opts { static inline struct format_opts format_opts_default() { + unsigned version = !access( "/sys/module/bcachefs/parameters/version", R_OK) + ? read_file_u64(AT_FDCWD, "/sys/module/bcachefs/parameters/version") + : bcachefs_metadata_version_current; + return (struct format_opts) { - .version = bcachefs_metadata_version_current, + .version = version, .superblock_size = SUPERBLOCK_SIZE_DEFAULT, }; } diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 39d8d317..a78232ed 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -386,14 +386,16 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c { struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); - const struct bch_backpointer *bps; unsigned i; prt_newline(out); printbuf_indent_add(out, 2); prt_printf(out, "gen %u oldest_gen %u data_type %s", - a->gen, a->oldest_gen, bch2_data_types[a->data_type]); + a->gen, a->oldest_gen, + a->data_type < BCH_DATA_NR + ? bch2_data_types[a->data_type] + : "(invalid data type)"); prt_newline(out); prt_printf(out, "journal_seq %llu", a->journal_seq); prt_newline(out); @@ -413,33 +415,41 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_newline(out); prt_printf(out, "io_time[WRITE] %llu", a->io_time[WRITE]); prt_newline(out); - prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a)); - printbuf_indent_add(out, 2); - bps = alloc_v4_backpointers_c(a); - for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a); i++) { + if (k.k->type == KEY_TYPE_alloc_v4) { + struct bkey_s_c_alloc_v4 a_raw = bkey_s_c_to_alloc_v4(k); + const struct bch_backpointer *bps = alloc_v4_backpointers_c(a_raw.v); + + prt_printf(out, "bp_start %llu", BCH_ALLOC_V4_BACKPOINTERS_START(a_raw.v)); prt_newline(out); - bch2_backpointer_to_text(out, &bps[i]); + + prt_printf(out, "backpointers: %llu", BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v)); + printbuf_indent_add(out, 2); + + for (i = 0; i < BCH_ALLOC_V4_NR_BACKPOINTERS(a_raw.v); i++) { + prt_newline(out); + bch2_backpointer_to_text(out, &bps[i]); + } + + printbuf_indent_sub(out, 2); } - printbuf_indent_sub(out, 4); + printbuf_indent_sub(out, 2); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) { if (k.k->type == KEY_TYPE_alloc_v4) { - int d; + void *src, *dst; *out = *bkey_s_c_to_alloc_v4(k).v; - d = (int) BCH_ALLOC_V4_U64s - - (int) (BCH_ALLOC_V4_BACKPOINTERS_START(out) ?: BCH_ALLOC_V4_U64s_V0); - if (unlikely(d > 0)) { - memset((u64 *) out + BCH_ALLOC_V4_BACKPOINTERS_START(out), - 0, - d * sizeof(u64)); - SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); - } + src = alloc_v4_backpointers(out); + SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); + dst = alloc_v4_backpointers(out); + + if (src < dst) + memset(src, 0, dst - src); } else { struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); @@ -465,20 +475,20 @@ static noinline struct bkey_i_alloc_v4 * __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) { struct bkey_i_alloc_v4 *ret; - unsigned bytes = k.k->type == KEY_TYPE_alloc_v4 - ? bkey_bytes(k.k) - : sizeof(struct bkey_i_alloc_v4); - - /* - * Reserve space for one more backpointer here: - * Not sketchy at doing it this way, nope... - */ - ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer)); - if (IS_ERR(ret)) - return ret; - if (k.k->type == KEY_TYPE_alloc_v4) { - struct bch_backpointer *src, *dst; + struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + unsigned bytes = sizeof(struct bkey_i_alloc_v4) + + BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) * + sizeof(struct bch_backpointer); + void *src, *dst; + + /* + * Reserve space for one more backpointer here: + * Not sketchy at doing it this way, nope... + */ + ret = bch2_trans_kmalloc(trans, bytes + sizeof(struct bch_backpointer)); + if (IS_ERR(ret)) + return ret; bkey_reassemble(&ret->k_i, k); @@ -488,9 +498,15 @@ __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) memmove(dst, src, BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v) * sizeof(struct bch_backpointer)); - memset(src, 0, dst - src); + if (src < dst) + memset(src, 0, dst - src); set_alloc_v4_u64s(ret); } else { + ret = bch2_trans_kmalloc(trans, sizeof(struct bkey_i_alloc_v4) + + sizeof(struct bch_backpointer)); + if (IS_ERR(ret)) + return ret; + bkey_alloc_v4_init(&ret->k_i); ret->k.p = k.k->p; bch2_alloc_to_v4(k, &ret->v); @@ -508,10 +524,8 @@ static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_ */ struct bkey_i_alloc_v4 *ret = bch2_trans_kmalloc_nomemzero(trans, bkey_bytes(k.k) + sizeof(struct bch_backpointer)); - if (!IS_ERR(ret)) { + if (!IS_ERR(ret)) bkey_reassemble(&ret->k_i, k); - memset((void *) ret + bkey_bytes(k.k), 0, sizeof(struct bch_backpointer)); - } return ret; } @@ -789,6 +803,7 @@ static int bch2_bucket_do_index(struct btree_trans *trans, goto err; if (ca->mi.freespace_initialized && + test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags) && bch2_trans_inconsistent_on(old.k->type != old_type, trans, "incorrect key when %s %s btree (got %s should be %s)\n" " for %s", @@ -900,13 +915,11 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, new_lru = alloc_lru_idx(*new_a); if (old_lru != new_lru) { - ret = bch2_lru_change(trans, new->k.p.inode, new->k.p.offset, - old_lru, &new_lru, old); + ret = bch2_lru_change(trans, new->k.p.inode, + bucket_to_u64(new->k.p), + old_lru, new_lru); if (ret) return ret; - - if (new_a->data_type == BCH_DATA_cached) - new_a->io_time[READ] = new_lru; } if (old_a->gen != new_a->gen) { @@ -1244,7 +1257,15 @@ static int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, } if (need_update) { - ret = bch2_trans_update(trans, bucket_gens_iter, &g.k_i, 0); + struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(g)); + + ret = PTR_ERR_OR_ZERO(k); + if (ret) + goto err; + + memcpy(k, &g, sizeof(g)); + + ret = bch2_trans_update(trans, bucket_gens_iter, k, 0); if (ret) goto err; } @@ -1370,7 +1391,7 @@ static int bch2_check_bucket_gens_key(struct btree_trans *trans, k = bch2_trans_kmalloc(trans, sizeof(g)); ret = PTR_ERR_OR_ZERO(k); if (ret) - return ret; + goto out; memcpy(k, &g, sizeof(g)); ret = bch2_trans_update(trans, iter, k, 0); @@ -1422,7 +1443,7 @@ int bch2_check_alloc_info(struct bch_fs *c) &freespace_iter, &bucket_gens_iter); if (ret) - break; + goto bkey_err; } else { next = k.k->p; @@ -1488,7 +1509,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct bkey_s_c alloc_k, k; struct printbuf buf = PRINTBUF; - struct printbuf buf2 = PRINTBUF; int ret; alloc_k = bch2_btree_iter_peek(alloc_iter); @@ -1505,8 +1525,9 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, return 0; bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, - POS(alloc_k.k->p.inode, a->io_time[READ]), 0); - + lru_pos(alloc_k.k->p.inode, + bucket_to_u64(alloc_k.k->p), + a->io_time[READ]), 0); k = bch2_btree_iter_peek_slot(&lru_iter); ret = bkey_err(k); if (ret) @@ -1517,21 +1538,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, " %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || - fsck_err_on(k.k->type != KEY_TYPE_lru || - le64_to_cpu(bkey_s_c_to_lru(k).v->idx) != alloc_k.k->p.offset, c, - "incorrect/missing lru entry\n" - " %s\n" + fsck_err_on(k.k->type != KEY_TYPE_set, c, + "missing lru entry\n" " %s", (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), - (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) { + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { u64 read_time = a->io_time[READ] ?: atomic64_read(&c->io_clock[READ].now); ret = bch2_lru_set(trans, alloc_k.k->p.inode, - alloc_k.k->p.offset, - &read_time); + bucket_to_u64(alloc_k.k->p), + read_time); if (ret) goto err; @@ -1552,7 +1570,6 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &lru_iter); - printbuf_exit(&buf2); printbuf_exit(&buf); return ret; } @@ -1630,21 +1647,28 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto write; } - if (bch2_trans_inconsistent_on(a->v.journal_seq > c->journal.flushed_seq_ondisk, trans, - "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" - "%s", - a->v.journal_seq, - c->journal.flushed_seq_ondisk, - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { + if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + bch2_trans_inconsistent(trans, + "clearing need_discard but journal_seq %llu > flushed_seq %llu\n" + "%s", + a->v.journal_seq, + c->journal.flushed_seq_ondisk, + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + } goto out; } - if (bch2_trans_inconsistent_on(a->v.data_type != BCH_DATA_need_discard, trans, - "bucket incorrectly set in need_discard btree\n" - "%s", - (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { - ret = -EIO; + if (a->v.data_type != BCH_DATA_need_discard) { + if (test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + bch2_trans_inconsistent(trans, + "bucket incorrectly set in need_discard btree\n" + "%s", + (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); + ret = -EIO; + } + goto out; } @@ -1732,51 +1756,34 @@ void bch2_do_discards(struct bch_fs *c) } static int invalidate_one_bucket(struct btree_trans *trans, - struct btree_iter *lru_iter, struct bkey_s_c k, - unsigned dev_idx, s64 *nr_to_invalidate) + struct btree_iter *lru_iter, + struct bpos bucket, + s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; struct btree_iter alloc_iter = { NULL }; struct bkey_i_alloc_v4 *a; - struct bpos bucket; struct printbuf buf = PRINTBUF; unsigned cached_sectors; int ret = 0; - if (*nr_to_invalidate <= 0 || k.k->p.inode != dev_idx) + if (*nr_to_invalidate <= 0) return 1; - if (k.k->type != KEY_TYPE_lru) { - prt_printf(&buf, "non lru key in lru btree:\n "); - bch2_bkey_val_to_text(&buf, c, k); - - if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { - bch_err(c, "%s", buf.buf); - } else { - bch2_trans_inconsistent(trans, "%s", buf.buf); - ret = -EINVAL; - } - - goto out; - } - - bucket = POS(dev_idx, le64_to_cpu(bkey_s_c_to_lru(k).v->idx)); - a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; - if (k.k->p.offset != alloc_lru_idx(a->v)) { + if (lru_pos_time(lru_iter->pos) != alloc_lru_idx(a->v)) { prt_printf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); + bch2_bpos_to_text(&buf, lru_iter->pos); prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, k); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); - if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { - bch_err(c, "%s", buf.buf); - } else { - bch2_trans_inconsistent(trans, "%s", buf.buf); + bch_err(c, "%s", buf.buf); + if (test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { + bch2_inconsistent_error(c); ret = -EINVAL; } @@ -1827,9 +1834,13 @@ static void bch2_do_invalidates_work(struct work_struct *work) s64 nr_to_invalidate = should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); - ret = for_each_btree_key2(&trans, iter, BTREE_ID_lru, - POS(ca->dev_idx, 0), BTREE_ITER_INTENT, k, - invalidate_one_bucket(&trans, &iter, k, ca->dev_idx, &nr_to_invalidate)); + ret = for_each_btree_key2_upto(&trans, iter, BTREE_ID_lru, + lru_pos(ca->dev_idx, 0, 0), + lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), + BTREE_ITER_INTENT, k, + invalidate_one_bucket(&trans, &iter, + u64_to_bucket(k.k->p.offset), + &nr_to_invalidate)); if (ret < 0) { percpu_ref_put(&ca->ref); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index be48b7d8..a0c3c47b 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -23,6 +23,16 @@ static inline bool bch2_dev_bucket_exists(struct bch_fs *c, struct bpos pos) pos.offset < ca->mi.nbuckets; } +static inline u64 bucket_to_u64(struct bpos bucket) +{ + return (bucket.inode << 48) | bucket.offset; +} + +static inline struct bpos u64_to_bucket(u64 bucket) +{ + return POS(bucket >> 48, bucket & ~(~0ULL << 48)); +} + static inline u8 alloc_gc_gen(struct bch_alloc_v4 a) { return a.gen - a.oldest_gen; @@ -112,8 +122,6 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -#define ALLOC_SCAN_BATCH(ca) max_t(size_t, 1, (ca)->mi.nbuckets >> 9) - int bch2_alloc_v1_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); int bch2_alloc_v2_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); int bch2_alloc_v3_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); @@ -192,7 +200,9 @@ void bch2_do_invalidates(struct bch_fs *); static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a) { - return (void *) ((u64 *) &a->v + BCH_ALLOC_V4_BACKPOINTERS_START(a)); + return (void *) ((u64 *) &a->v + + (BCH_ALLOC_V4_BACKPOINTERS_START(a) ?: + BCH_ALLOC_V4_U64s_V0)); } static inline const struct bch_backpointer *alloc_v4_backpointers_c(const struct bch_alloc_v4 *a) diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 46f215c8..f1cfb90b 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -28,6 +28,7 @@ #include "io.h" #include "journal.h" #include "movinggc.h" +#include "nocow_locking.h" #include <linux/math64.h> #include <linux/rculist.h> @@ -312,28 +313,34 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc a = bch2_alloc_to_v4(k, &a_convert); - if (genbits != (alloc_freespace_genbits(*a) >> 56)) { - prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" - " freespace key ", - genbits, alloc_freespace_genbits(*a) >> 56); + if (a->data_type != BCH_DATA_free) { + if (!test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + ob = NULL; + goto err; + } + + prt_printf(&buf, "non free bucket in freespace btree\n" + " freespace key "); bch2_bkey_val_to_text(&buf, c, freespace_k); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; - } - if (a->data_type != BCH_DATA_free) { - prt_printf(&buf, "non free bucket in freespace btree\n" - " freespace key "); + if (genbits != (alloc_freespace_genbits(*a) >> 56) && + test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + prt_printf(&buf, "bucket in freespace btree with wrong genbits (got %u should be %llu)\n" + " freespace key ", + genbits, alloc_freespace_genbits(*a) >> 56); bch2_bkey_val_to_text(&buf, c, freespace_k); prt_printf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); bch2_trans_inconsistent(trans, "%s", buf.buf); ob = ERR_PTR(-EIO); goto err; + } if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { @@ -506,8 +513,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans, { struct bch_fs *c = trans->c; struct open_bucket *ob = NULL; - bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized); - u64 start = freespace_initialized ? 0 : ca->bucket_alloc_trans_early_cursor; + bool freespace = READ_ONCE(ca->mi.freespace_initialized); + u64 start = freespace ? 0 : ca->bucket_alloc_trans_early_cursor; u64 avail; struct bucket_alloc_state s = { .cur_bucket = start }; bool waiting = false; @@ -546,20 +553,25 @@ again: if (ob) return ob; } - - ob = likely(ca->mi.freespace_initialized) +alloc: + ob = likely(freespace) ? bch2_bucket_alloc_freelist(trans, ca, reserve, &s, cl) : bch2_bucket_alloc_early(trans, ca, reserve, &s, cl); if (s.skipped_need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); - if (!ob && !freespace_initialized && start) { + if (!ob && !freespace && start) { start = s.cur_bucket = 0; - goto again; + goto alloc; } - if (!freespace_initialized) + if (!ob && freespace && !test_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags)) { + freespace = false; + goto alloc; + } + + if (!freespace) ca->bucket_alloc_trans_early_cursor = s.cur_bucket; err: if (!ob) @@ -1224,12 +1236,9 @@ err: if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty) || bch2_err_matches(ret, BCH_ERR_freelist_empty)) return cl - ? -EAGAIN + ? -BCH_ERR_bucket_alloc_blocked : -BCH_ERR_ENOSPC_bucket_alloc; - if (bch2_err_matches(ret, BCH_ERR_insufficient_devices)) - return -EROFS; - return ret; } diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 7660a254..405823d1 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -242,6 +242,9 @@ btree: memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp))) { struct printbuf buf = PRINTBUF; + if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) + goto err; + prt_printf(&buf, "backpointer not found when deleting"); prt_newline(&buf); printbuf_indent_add(&buf, 2); @@ -261,12 +264,9 @@ btree: prt_printf(&buf, "for "); bch2_bkey_val_to_text(&buf, c, orig_k); - if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { - bch_err(c, "%s", buf.buf); - } else { - ret = -EIO; - bch2_trans_inconsistent(trans, "%s", buf.buf); - } + bch_err(c, "%s", buf.buf); + bch2_inconsistent_error(c); + ret = -EIO; printbuf_exit(&buf); goto err; } @@ -283,7 +283,6 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans, struct bkey_s_c orig_k) { struct bch_fs *c = trans->c; - struct bch_dev *ca; struct bch_backpointer *bps = alloc_v4_backpointers(&a->v); unsigned i, nr = BCH_ALLOC_V4_NR_BACKPOINTERS(&a->v); struct bkey_i_backpointer *bp_k; @@ -317,11 +316,10 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans, prt_printf(&buf, "for "); bch2_bkey_val_to_text(&buf, c, orig_k); - if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) - bch_err(c, "%s", buf.buf); - else { - bch2_trans_inconsistent(trans, "%s", buf.buf); - printbuf_exit(&buf); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + bch2_inconsistent_error(c); return -EIO; } } @@ -334,18 +332,9 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans, } /* Overflow: use backpointer btree */ - bp_k = bch2_trans_kmalloc(trans, sizeof(*bp_k)); - ret = PTR_ERR_OR_ZERO(bp_k); - if (ret) - return ret; - - ca = bch_dev_bkey_exists(c, a->k.p.inode); - bkey_backpointer_init(&bp_k->k_i); - bp_k->k.p = bucket_pos_to_bp(c, a->k.p, bp.bucket_offset); - bp_k->v = bp; - - bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, bp_k->k.p, + bch2_trans_iter_init(trans, &bp_iter, BTREE_ID_backpointers, + bucket_pos_to_bp(c, a->k.p, bp.bucket_offset), BTREE_ITER_INTENT| BTREE_ITER_SLOTS| BTREE_ITER_WITH_UPDATES); @@ -369,16 +358,22 @@ int bch2_bucket_backpointer_add(struct btree_trans *trans, prt_printf(&buf, "for "); bch2_bkey_val_to_text(&buf, c, orig_k); - if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) - bch_err(c, "%s", buf.buf); - else { - bch2_trans_inconsistent(trans, "%s", buf.buf); - printbuf_exit(&buf); + bch_err(c, "%s", buf.buf); + printbuf_exit(&buf); + if (test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags)) { + bch2_inconsistent_error(c); ret = -EIO; goto err; } } + bp_k = bch2_bkey_alloc(trans, &bp_iter, backpointer); + ret = PTR_ERR_OR_ZERO(bp_k); + if (ret) + goto err; + + bp_k->v = bp; + ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); err: bch2_trans_iter_exit(trans, &bp_iter); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 7f479cdc..febef9ac 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -206,7 +206,7 @@ #include "bcachefs_format.h" #include "errcode.h" #include "fifo.h" -#include "nocow_locking.h" +#include "nocow_locking_types.h" #include "opts.h" #include "util.h" @@ -549,6 +549,7 @@ enum { /* fsck passes: */ BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ + BCH_FS_CHECK_ALLOC_DONE, BCH_FS_CHECK_LRUS_DONE, BCH_FS_CHECK_BACKPOINTERS_DONE, BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index 91a6624e..48438e67 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -1557,7 +1557,8 @@ struct bch_sb_field_journal_seq_blacklist { x(backpointers, 22) \ x(inode_v3, 23) \ x(unwritten_extents, 24) \ - x(bucket_gens, 25) + x(bucket_gens, 25) \ + x(lru_v2, 26) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/libbcachefs/bkey_methods.c b/libbcachefs/bkey_methods.c index bb74e6f7..e13ce07f 100644 --- a/libbcachefs/bkey_methods.c +++ b/libbcachefs/bkey_methods.c @@ -186,7 +186,7 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_snapshot), [BKEY_TYPE_lru] = (1U << KEY_TYPE_deleted)| - (1U << KEY_TYPE_lru), + (1U << KEY_TYPE_set), [BKEY_TYPE_freespace] = (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_set), diff --git a/libbcachefs/btree_cache.c b/libbcachefs/btree_cache.c index d24827fb..b5e78042 100644 --- a/libbcachefs/btree_cache.c +++ b/libbcachefs/btree_cache.c @@ -577,7 +577,7 @@ int bch2_btree_cache_cannibalize_lock(struct bch_fs *c, struct closure *cl) } trace_and_count(c, btree_cache_cannibalize_lock_fail, c); - return -EAGAIN; + return -BCH_ERR_btree_cache_cannibalize_lock_blocked; success: trace_and_count(c, btree_cache_cannibalize_lock, c); @@ -952,8 +952,6 @@ retry: * bch_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * - * If IO is necessary and running under generic_make_request, returns -EAGAIN. - * * The btree node will have either a read or a write lock held, depending on * the @write parameter. */ diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 055987a2..6b7353c9 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1285,8 +1285,7 @@ fsck_err: return ret; } -static int bch2_gc_start(struct bch_fs *c, - bool metadata_only) +static int bch2_gc_start(struct bch_fs *c) { struct bch_dev *ca = NULL; unsigned i; @@ -1301,7 +1300,6 @@ static int bch2_gc_start(struct bch_fs *c, } for_each_member_device(ca, c, i) { - BUG_ON(ca->buckets_gc); BUG_ON(ca->usage_gc); ca->usage_gc = alloc_percpu(struct bch_dev_usage); @@ -1318,6 +1316,22 @@ static int bch2_gc_start(struct bch_fs *c, return 0; } +static int bch2_gc_reset(struct bch_fs *c) +{ + struct bch_dev *ca; + unsigned i; + + for_each_member_device(ca, c, i) { + free_percpu(ca->usage_gc); + ca->usage_gc = NULL; + } + + free_percpu(c->usage_gc); + c->usage_gc = NULL; + + return bch2_gc_start(c); +} + /* returns true if not equal */ static inline bool bch2_alloc_v4_cmp(struct bch_alloc_v4 l, struct bch_alloc_v4 r) @@ -1763,7 +1777,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) bch2_btree_interior_updates_flush(c); - ret = bch2_gc_start(c, metadata_only) ?: + ret = bch2_gc_start(c) ?: bch2_gc_alloc_start(c, metadata_only) ?: bch2_gc_reflink_start(c, metadata_only); if (ret) @@ -1824,6 +1838,9 @@ again: bch2_gc_stripes_reset(c, metadata_only); bch2_gc_alloc_reset(c, metadata_only); bch2_gc_reflink_reset(c, metadata_only); + ret = bch2_gc_reset(c); + if (ret) + goto out; /* flush fsck errors, reset counters */ bch2_flush_fsck_errs(c); @@ -1975,7 +1992,7 @@ int bch2_gc_gens(struct bch_fs *c) NULL, NULL, BTREE_INSERT_NOFAIL, gc_btree_gens_key(&trans, &iter, k)); - if (ret && ret != -EROFS) + if (ret && !bch2_err_matches(ret, EROFS)) bch_err(c, "error recalculating oldest_gen: %s", bch2_err_str(ret)); if (ret) goto err; @@ -1988,7 +2005,7 @@ int bch2_gc_gens(struct bch_fs *c) NULL, NULL, BTREE_INSERT_NOFAIL, bch2_alloc_write_oldest_gen(&trans, &iter, k)); - if (ret && ret != -EROFS) + if (ret && !bch2_err_matches(ret, EROFS)) bch_err(c, "error writing oldest_gen: %s", bch2_err_str(ret)); if (ret) goto err; diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index f9ccc216..9c139a7b 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -2077,6 +2077,11 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e iter->update_path, pos, iter->flags & BTREE_ITER_INTENT, _THIS_IP_); + ret = bch2_btree_path_traverse(trans, iter->update_path, iter->flags); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } } /* @@ -2776,6 +2781,20 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) return p; } +static noinline void bch2_trans_reset_srcu_lock(struct btree_trans *trans) +{ + struct bch_fs *c = trans->c; + struct btree_path *path; + + trans_for_each_path(trans, path) + if (path->cached && !btree_node_locked(path, 0)) + path->l[0].b = ERR_PTR(-BCH_ERR_no_btree_node_srcu_reset); + + srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); + trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; +} + /** * bch2_trans_begin() - reset a transaction after a interrupted attempt * @trans: transaction to reset @@ -2831,6 +2850,9 @@ u32 bch2_trans_begin(struct btree_trans *trans) bch2_trans_relock(trans); } + if (unlikely(time_after(jiffies, trans->srcu_lock_time + HZ))) + bch2_trans_reset_srcu_lock(trans); + trans->last_restarted_ip = _RET_IP_; if (trans->restarted) bch2_btree_path_traverse_all(trans); @@ -2920,6 +2942,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ trans->nr_max_paths = s->nr_max_paths; trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier); + trans->srcu_lock_time = jiffies; mutex_lock(&c->btree_trans_lock); list_for_each_entry(pos, &c->btree_trans_list, list) { diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index 2f13be60..07c415d5 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -459,7 +459,7 @@ static inline struct bkey_i *bch2_bkey_get_mut(struct btree_trans *trans, #define bch2_bkey_alloc(_trans, _iter, _type) \ ({ \ - struct bkey_i_##_type *_k = bch2_trans_kmalloc(_trans, sizeof(*_k));\ + struct bkey_i_##_type *_k = bch2_trans_kmalloc_nomemzero(_trans, sizeof(*_k));\ if (!IS_ERR(_k)) { \ bkey_##_type##_init(&_k->k_i); \ _k->k.p = (_iter)->pos; \ diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h index 07c509aa..af86ba12 100644 --- a/libbcachefs/btree_types.h +++ b/libbcachefs/btree_types.h @@ -233,7 +233,7 @@ struct btree_path { /* btree_iter_copy starts here: */ struct bpos pos; - enum btree_id btree_id:4; + enum btree_id btree_id:5; bool cached:1; bool preserve:1; enum btree_path_uptodate uptodate:2; @@ -243,7 +243,7 @@ struct btree_path { */ bool should_be_locked:1; unsigned level:3, - locks_want:4; + locks_want:3; u8 nodes_locked; struct btree_path_level { @@ -277,7 +277,7 @@ struct btree_iter { struct btree_path *update_path; struct btree_path *key_cache_path; - enum btree_id btree_id:4; + enum btree_id btree_id:8; unsigned min_depth:3; unsigned advanced:1; @@ -421,6 +421,7 @@ struct btree_trans { enum bch_errcode restarted:16; u32 restart_count; unsigned long last_restarted_ip; + unsigned long srcu_lock_time; /* * For when bch2_trans_update notices we'll be splitting a compressed @@ -442,7 +443,7 @@ struct btree_trans { /* update path: */ struct btree_trans_commit_hook *hooks; - DARRAY(u64) extra_journal_entries; + darray_u64 extra_journal_entries; struct journal_entry_pin *journal_pin; struct journal_res journal_res; @@ -702,15 +703,6 @@ struct btree_root { s8 error; }; -enum btree_insert_ret { - BTREE_INSERT_OK, - /* leaf node needs to be split */ - BTREE_INSERT_BTREE_NODE_FULL, - BTREE_INSERT_NEED_MARK_REPLICAS, - BTREE_INSERT_NEED_JOURNAL_RES, - BTREE_INSERT_NEED_JOURNAL_RECLAIM, -}; - enum btree_gc_coalesce_fail_reason { BTREE_GC_COALESCE_FAIL_RESERVE_GET, BTREE_GC_COALESCE_FAIL_KEYLIST_REALLOC, diff --git a/libbcachefs/btree_update.h b/libbcachefs/btree_update.h index 1c2e7b2b..7e9f1f17 100644 --- a/libbcachefs/btree_update.h +++ b/libbcachefs/btree_update.h @@ -82,7 +82,8 @@ void bch2_trans_commit_hook(struct btree_trans *, struct btree_trans_commit_hook *); int __bch2_trans_commit(struct btree_trans *); -int bch2_trans_log_msg(struct btree_trans *, const char *); +int bch2_trans_log_msg(struct btree_trans *, const char *, ...); +int bch2_fs_log_msg(struct bch_fs *, const char *, ...); /** * bch2_trans_commit - insert keys at given iterator positions diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index a4476f16..a49e7b6b 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -1162,7 +1162,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, bch2_trans_unlock(trans); closure_sync(&cl); - } while (ret == -EAGAIN); + } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } if (ret) { diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c index 154a819b..a2b37dd4 100644 --- a/libbcachefs/btree_update_leaf.c +++ b/libbcachefs/btree_update_leaf.c @@ -316,15 +316,10 @@ bch2_trans_journal_preres_get_cold(struct btree_trans *trans, unsigned u64s, static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans, unsigned flags) { - struct bch_fs *c = trans->c; - int ret; - - ret = bch2_journal_res_get(&c->journal, &trans->journal_res, - trans->journal_u64s, - flags| - (trans->flags & JOURNAL_WATERMARK_MASK)); - - return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret; + return bch2_journal_res_get(&trans->c->journal, &trans->journal_res, + trans->journal_u64s, + flags| + (trans->flags & JOURNAL_WATERMARK_MASK)); } #define JSET_ENTRY_LOG_U64s 4 @@ -343,23 +338,20 @@ static void journal_transaction_name(struct btree_trans *trans) strncpy(l->d, trans->fn, JSET_ENTRY_LOG_U64s * sizeof(u64)); } -static inline enum btree_insert_ret -btree_key_can_insert(struct btree_trans *trans, - struct btree *b, - unsigned u64s) +static inline int btree_key_can_insert(struct btree_trans *trans, + struct btree *b, unsigned u64s) { struct bch_fs *c = trans->c; if (!bch2_btree_node_insert_fits(c, b, u64s)) - return BTREE_INSERT_BTREE_NODE_FULL; + return -BCH_ERR_btree_insert_btree_node_full; - return BTREE_INSERT_OK; + return 0; } -static enum btree_insert_ret -btree_key_can_insert_cached(struct btree_trans *trans, - struct btree_path *path, - unsigned u64s) +static int btree_key_can_insert_cached(struct btree_trans *trans, + struct btree_path *path, + unsigned u64s) { struct bch_fs *c = trans->c; struct bkey_cached *ck = (void *) path->l[0].b; @@ -372,7 +364,7 @@ btree_key_can_insert_cached(struct btree_trans *trans, if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) && bch2_btree_key_cache_must_wait(c) && !(trans->flags & BTREE_INSERT_JOURNAL_RECLAIM)) - return BTREE_INSERT_NEED_JOURNAL_RECLAIM; + return -BCH_ERR_btree_insert_need_journal_reclaim; /* * bch2_varint_decode can read past the end of the buffer by at most 7 @@ -381,7 +373,7 @@ btree_key_can_insert_cached(struct btree_trans *trans, u64s += 1; if (u64s <= ck->u64s) - return BTREE_INSERT_OK; + return 0; new_u64s = roundup_pow_of_two(u64s); new_k = krealloc(ck->k, new_u64s * sizeof(u64), GFP_NOFS); @@ -646,21 +638,13 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, trans->journal_res.seq = c->journal.replay_journal_seq; } - if (unlikely(trans->extra_journal_entries.nr)) { - memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), - trans->extra_journal_entries.data, - trans->extra_journal_entries.nr); - - trans->journal_res.offset += trans->extra_journal_entries.nr; - trans->journal_res.u64s -= trans->extra_journal_entries.nr; - } - /* * Not allowed to fail after we've gotten our journal reservation - we * have to use it: */ - if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && + !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (bch2_journal_seq_verify) trans_for_each_update(trans, i) i->k->k.version.lo = trans->journal_res.seq; @@ -671,7 +655,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, if (trans->fs_usage_deltas && bch2_trans_fs_usage_apply(trans, trans->fs_usage_deltas)) - return BTREE_INSERT_NEED_MARK_REPLICAS; + return -BCH_ERR_btree_insert_need_mark_replicas; trans_for_each_update(trans, i) if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) { @@ -686,6 +670,15 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, return ret; } + if (unlikely(trans->extra_journal_entries.nr)) { + memcpy_u64s_small(journal_res_entry(&c->journal, &trans->journal_res), + trans->extra_journal_entries.data, + trans->extra_journal_entries.nr); + + trans->journal_res.offset += trans->extra_journal_entries.nr; + trans->journal_res.u64s -= trans->extra_journal_entries.nr; + } + if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { trans_for_each_update(trans, i) { struct journal *j = &c->journal; @@ -844,7 +837,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, &trans->journal_preres, trans->journal_preres_u64s, JOURNAL_RES_GET_NONBLOCK| (trans->flags & JOURNAL_WATERMARK_MASK)); - if (unlikely(ret == -EAGAIN)) + if (unlikely(ret == -BCH_ERR_journal_preres_get_blocked)) ret = bch2_trans_journal_preres_get_cold(trans, trans->journal_preres_u64s, trace_ip); if (unlikely(ret)) @@ -900,12 +893,12 @@ int bch2_trans_commit_error(struct btree_trans *trans, struct bch_fs *c = trans->c; switch (ret) { - case BTREE_INSERT_BTREE_NODE_FULL: + case -BCH_ERR_btree_insert_btree_node_full: ret = bch2_btree_split_leaf(trans, i->path, trans->flags); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) trace_and_count(c, trans_restart_btree_node_split, trans, trace_ip, i->path); break; - case BTREE_INSERT_NEED_MARK_REPLICAS: + case -BCH_ERR_btree_insert_need_mark_replicas: bch2_trans_unlock(trans); ret = bch2_replicas_delta_list_mark(c, trans->fs_usage_deltas); @@ -916,7 +909,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) trace_and_count(c, trans_restart_mark_replicas, trans, trace_ip); break; - case BTREE_INSERT_NEED_JOURNAL_RES: + case -BCH_ERR_journal_res_get_blocked: bch2_trans_unlock(trans); if ((trans->flags & BTREE_INSERT_JOURNAL_RECLAIM) && @@ -933,7 +926,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, if (ret) trace_and_count(c, trans_restart_journal_res_get, trans, trace_ip); break; - case BTREE_INSERT_NEED_JOURNAL_RECLAIM: + case -BCH_ERR_btree_insert_need_journal_reclaim: bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); @@ -970,7 +963,7 @@ bch2_trans_commit_get_rw_cold(struct btree_trans *trans) if (likely(!(trans->flags & BTREE_INSERT_LAZY_RW)) || test_bit(BCH_FS_STARTED, &c->flags)) - return -EROFS; + return -BCH_ERR_erofs_trans_commit; bch2_trans_unlock(trans); @@ -1734,18 +1727,25 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id, return ret; } -int bch2_trans_log_msg(struct btree_trans *trans, const char *msg) +static int __bch2_trans_log_msg(darray_u64 *entries, const char *fmt, va_list args) { - unsigned len = strlen(msg); - unsigned u64s = DIV_ROUND_UP(len, sizeof(u64)); + struct printbuf buf = PRINTBUF; struct jset_entry_log *l; + unsigned u64s; int ret; - ret = darray_make_room(&trans->extra_journal_entries, jset_u64s(u64s)); + prt_vprintf(&buf, fmt, args); + ret = buf.allocation_failure ? -ENOMEM : 0; if (ret) - return ret; + goto err; - l = (void *) &darray_top(trans->extra_journal_entries); + u64s = DIV_ROUND_UP(buf.pos, sizeof(u64)); + + ret = darray_make_room(entries, jset_u64s(u64s)); + if (ret) + goto err; + + l = (void *) &darray_top(*entries); l->entry.u64s = cpu_to_le16(u64s); l->entry.btree_id = 0; l->entry.level = 1; @@ -1753,10 +1753,44 @@ int bch2_trans_log_msg(struct btree_trans *trans, const char *msg) l->entry.pad[0] = 0; l->entry.pad[1] = 0; l->entry.pad[2] = 0; - memcpy(l->d, msg, len); - while (len & 7) - l->d[len++] = '\0'; + memcpy(l->d, buf.buf, buf.pos); + while (buf.pos & 7) + l->d[buf.pos++] = '\0'; + + entries->nr += jset_u64s(u64s); +err: + printbuf_exit(&buf); + return ret; +} + +int bch2_trans_log_msg(struct btree_trans *trans, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + ret = __bch2_trans_log_msg(&trans->extra_journal_entries, fmt, args); + va_end(args); + + return ret; +} + +int bch2_fs_log_msg(struct bch_fs *c, const char *fmt, ...) +{ + va_list args; + int ret; + + va_start(args, fmt); + + if (!test_bit(JOURNAL_STARTED, &c->journal.flags)) { + ret = __bch2_trans_log_msg(&c->journal.early_journal_entries, fmt, args); + } else { + ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_LAZY_RW, + __bch2_trans_log_msg(&trans.extra_journal_entries, fmt, args)); + } + + va_end(args); + + return ret; - trans->extra_journal_entries.nr += jset_u64s(u64s); - return 0; } diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 3edd7b77..7ef7bb61 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -11,6 +11,7 @@ #include "io.h" #include "keylist.h" #include "move.h" +#include "nocow_locking.h" #include "subvolume.h" #include <trace/events/bcachefs.h> @@ -349,7 +350,7 @@ void bch2_update_unwritten_extent(struct btree_trans *trans, update->op.nr_replicas, update->op.alloc_reserve, 0, &cl, &wp); - if (ret == -EAGAIN) { + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { bch2_trans_unlock(trans); closure_sync(&cl); continue; @@ -459,7 +460,7 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - return ret; + goto err; } m->op.nr_replicas = m->op.nr_replicas_required = @@ -471,6 +472,14 @@ int bch2_data_update_init(struct bch_fs *c, struct data_update *m, if (bkey_extent_is_unwritten(k)) return -BCH_ERR_unwritten_extent_update; return 0; +err: + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + bch2_bucket_nocow_unlock(&c->nocow_locks, + PTR_BUCKET_POS(c, &p.ptr), 0); + + bch2_bkey_buf_exit(&m->k, c); + bch2_bio_free_pages_pool(c, &m->op.wbio.bio); + return ret; } void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) diff --git a/libbcachefs/ec.c b/libbcachefs/ec.c index 53f0d820..c234c8d5 100644 --- a/libbcachefs/ec.c +++ b/libbcachefs/ec.c @@ -942,7 +942,7 @@ static void ec_stripe_create(struct ec_stripe_new *s) closure_sync(&s->iodone); if (s->err) { - if (s->err != -EROFS) + if (!bch2_err_matches(s->err, EROFS)) bch_err(c, "error creating stripe: error writing data buckets"); goto err; } diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index 4942c367..62170964 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -20,7 +20,6 @@ x(0, open_buckets_empty) \ x(0, freelist_empty) \ x(BCH_ERR_freelist_empty, no_buckets_found) \ - x(0, insufficient_devices) \ x(0, transaction_restart) \ x(BCH_ERR_transaction_restart, transaction_restart_fault_inject) \ x(BCH_ERR_transaction_restart, transaction_restart_relock) \ @@ -53,6 +52,12 @@ x(BCH_ERR_no_btree_node, no_btree_node_down) \ x(BCH_ERR_no_btree_node, no_btree_node_init) \ x(BCH_ERR_no_btree_node, no_btree_node_cached) \ + x(BCH_ERR_no_btree_node, no_btree_node_srcu_reset) \ + x(0, btree_insert_fail) \ + x(BCH_ERR_btree_insert_fail, btree_insert_btree_node_full) \ + x(BCH_ERR_btree_insert_fail, btree_insert_need_mark_replicas) \ + x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_res) \ + x(BCH_ERR_btree_insert_fail, btree_insert_need_journal_reclaim) \ x(0, backpointer_to_overwritten_btree_node) \ x(0, lock_fail_root_changed) \ x(0, journal_reclaim_would_deadlock) \ @@ -76,6 +81,16 @@ x(EINVAL, device_already_online) \ x(EINVAL, insufficient_devices_to_start) \ x(EINVAL, invalid) \ + x(EROFS, erofs_trans_commit) \ + x(EROFS, erofs_no_writes) \ + x(EROFS, erofs_journal_err) \ + x(EROFS, erofs_sb_err) \ + x(EROFS, insufficient_devices) \ + x(0, operation_blocked) \ + x(BCH_ERR_operation_blocked, btree_cache_cannibalize_lock_blocked) \ + x(BCH_ERR_operation_blocked, journal_res_get_blocked) \ + x(BCH_ERR_operation_blocked, journal_preres_get_blocked) \ + x(BCH_ERR_operation_blocked, bucket_alloc_blocked) \ x(BCH_ERR_invalid, invalid_sb) \ x(BCH_ERR_invalid_sb, invalid_sb_magic) \ x(BCH_ERR_invalid_sb, invalid_sb_version) \ diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c index 938c7b43..585d16ac 100644 --- a/libbcachefs/inode.c +++ b/libbcachefs/inode.c @@ -269,6 +269,8 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k, static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k, struct bch_inode_unpacked *unpacked) { + memset(unpacked, 0, sizeof(*unpacked)); + switch (k.k->type) { case KEY_TYPE_inode: { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); diff --git a/libbcachefs/io.c b/libbcachefs/io.c index f0fca861..d215973a 100644 --- a/libbcachefs/io.c +++ b/libbcachefs/io.c @@ -27,6 +27,7 @@ #include "journal.h" #include "keylist.h" #include "move.h" +#include "nocow_locking.h" #include "rebalance.h" #include "subvolume.h" #include "super.h" @@ -427,7 +428,7 @@ retry: opts.data_replicas, opts.data_replicas, RESERVE_none, 0, &cl, &wp); - if (ret == -EAGAIN) { + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) { bch2_trans_unlock(trans); closure_sync(&cl); goto retry; @@ -753,15 +754,17 @@ static void __bch2_write_index(struct bch_write_op *op) op->written += sectors_start - keylist_sectors(keys); - if (ret) { + if (ret && !bch2_err_matches(ret, EROFS)) { struct bkey_i *k = bch2_keylist_front(&op->insert_keys); bch_err_inum_offset_ratelimited(c, k->k.p.inode, k->k.p.offset << 9, "write error while doing btree update: %s", bch2_err_str(ret)); - goto err; } + + if (ret) + goto err; } out: /* If some a bucket wasn't written, we can't erasure code it: */ @@ -1362,13 +1365,16 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) bch2_nocow_write_convert_one_unwritten(&trans, &iter, orig, k, op->new_i_size); })); - if (ret) { + if (ret && !bch2_err_matches(ret, EROFS)) { struct bkey_i *k = bch2_keylist_front(&op->insert_keys); bch_err_inum_offset_ratelimited(c, k->k.p.inode, k->k.p.offset << 9, "write error while doing btree update: %s", bch2_err_str(ret)); + } + + if (ret) { op->error = ret; break; } @@ -1406,7 +1412,7 @@ static void bch2_nocow_write(struct bch_write_op *op) struct { struct bpos b; unsigned gen; - two_state_lock_t *l; + struct nocow_lock_bucket *l; } buckets[BCH_REPLICAS_MAX]; unsigned nr_buckets = 0; u32 snapshot; @@ -1453,7 +1459,8 @@ retry: buckets[nr_buckets].b = PTR_BUCKET_POS(c, ptr); buckets[nr_buckets].gen = ptr->gen; buckets[nr_buckets].l = - bucket_nocow_lock(&c->nocow_locks, buckets[nr_buckets].b); + bucket_nocow_lock(&c->nocow_locks, + bucket_to_u64(buckets[nr_buckets].b)); prefetch(buckets[nr_buckets].l); nr_buckets++; @@ -1475,11 +1482,12 @@ retry: for (i = 0; i < nr_buckets; i++) { struct bch_dev *ca = bch_dev_bkey_exists(c, buckets[i].b.inode); - two_state_lock_t *l = buckets[i].l; + struct nocow_lock_bucket *l = buckets[i].l; bool stale; - if (!bch2_two_state_trylock(l, BUCKET_NOCOW_LOCK_UPDATE)) - __bch2_bucket_nocow_lock(&c->nocow_locks, l, BUCKET_NOCOW_LOCK_UPDATE); + __bch2_bucket_nocow_lock(&c->nocow_locks, l, + bucket_to_u64(buckets[i].b), + BUCKET_NOCOW_LOCK_UPDATE); rcu_read_lock(); stale = gen_after(*bucket_gen(ca, buckets[i].b.offset), buckets[i].gen); @@ -1627,7 +1635,7 @@ again: BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : &op->cl, &wp)); if (unlikely(ret)) { - if (ret == -EAGAIN) + if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) break; goto err; @@ -1775,7 +1783,7 @@ void bch2_write(struct closure *cl) if (c->opts.nochanges || !percpu_ref_tryget_live(&c->writes)) { - op->error = -EROFS; + op->error = -BCH_ERR_erofs_no_writes; goto err; } @@ -2905,11 +2913,6 @@ void bch2_fs_io_exit(struct bch_fs *c) int bch2_fs_io_init(struct bch_fs *c) { - unsigned i; - - for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++) - two_state_lock_init(&c->nocow_locks.l[i]); - if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), BIOSET_NEED_BVECS) || bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index 95c29229..1cbca187 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -199,12 +199,6 @@ static bool journal_entry_close(struct journal *j) /* * should _only_ called from journal_res_get() - when we actually want a * journal reservation - journal entry is open means journal is dirty: - * - * returns: - * 0: success - * -ENOSPC: journal currently full, must invoke reclaim - * -EAGAIN: journal blocked, must wait - * -EROFS: insufficient rw devices or journal error */ static int journal_entry_open(struct journal *j) { @@ -250,7 +244,7 @@ static int journal_entry_open(struct journal *j) journal_entry_overhead(j); u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); - if (u64s <= 0) + if (u64s <= (ssize_t) j->early_journal_entries.nr) return JOURNAL_ERR_journal_full; if (fifo_empty(&j->pin) && j->reclaim_thread) @@ -275,6 +269,12 @@ static int journal_entry_open(struct journal *j) buf->data->seq = cpu_to_le64(journal_cur_seq(j)); buf->data->u64s = 0; + if (j->early_journal_entries.nr) { + memcpy(buf->data->_data, j->early_journal_entries.data, + j->early_journal_entries.nr * sizeof(u64)); + le32_add_cpu(&buf->data->u64s, j->early_journal_entries.nr); + } + /* * Must be set before marking the journal entry as open: */ @@ -291,7 +291,9 @@ static int journal_entry_open(struct journal *j) BUG_ON(new.idx != (journal_cur_seq(j) & JOURNAL_BUF_MASK)); journal_state_inc(&new); - new.cur_entry_offset = 0; + + /* Handle any already added entries */ + new.cur_entry_offset = le32_to_cpu(buf->data->u64s); } while ((v = atomic64_cmpxchg(&j->reservations.counter, old.v, new.v)) != old.v); @@ -304,6 +306,9 @@ static int journal_entry_open(struct journal *j) &j->write_work, msecs_to_jiffies(c->opts.journal_flush_delay)); journal_wake(j); + + if (j->early_journal_entries.nr) + darray_exit(&j->early_journal_entries); return 0; } @@ -353,7 +358,7 @@ retry: return 0; if (bch2_journal_error(j)) - return -EROFS; + return -BCH_ERR_erofs_journal_err; spin_lock(&j->lock); @@ -445,7 +450,9 @@ unlock: } } - return ret == JOURNAL_ERR_insufficient_devices ? -EROFS : -EAGAIN; + return ret == JOURNAL_ERR_insufficient_devices + ? -EROFS + : -BCH_ERR_journal_res_get_blocked; } /* @@ -464,7 +471,8 @@ int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res, int ret; closure_wait_event(&j->async_wait, - (ret = __journal_res_get(j, res, flags)) != -EAGAIN || + (ret = __journal_res_get(j, res, flags)) != + -BCH_ERR_journal_res_get_blocked|| (flags & JOURNAL_RES_GET_NONBLOCK)); return ret; } @@ -720,39 +728,6 @@ int bch2_journal_meta(struct journal *j) return bch2_journal_flush_seq(j, res.seq); } -int bch2_journal_log_msg(struct journal *j, const char *fmt, ...) -{ - struct jset_entry_log *entry; - struct journal_res res = { 0 }; - unsigned msglen, u64s; - va_list args; - int ret; - - va_start(args, fmt); - msglen = vsnprintf(NULL, 0, fmt, args) + 1; - va_end(args); - - u64s = jset_u64s(DIV_ROUND_UP(msglen, sizeof(u64))); - - ret = bch2_journal_res_get(j, &res, u64s, 0); - if (ret) - return ret; - - entry = container_of(journal_res_entry(j, &res), - struct jset_entry_log, entry); - memset(entry, 0, u64s * sizeof(u64)); - entry->entry.type = BCH_JSET_ENTRY_log; - entry->entry.u64s = u64s - 1; - - va_start(args, fmt); - vsnprintf(entry->d, INT_MAX, fmt, args); - va_end(args); - - bch2_journal_res_put(j, &res); - - return bch2_journal_flush_seq(j, res.seq); -} - /* block/unlock the journal: */ void bch2_journal_unblock(struct journal *j) @@ -815,12 +790,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, } else { ob[nr_got] = bch2_bucket_alloc(c, ca, RESERVE_none, false, cl); - if (IS_ERR(ob[nr_got])) { - ret = cl - ? -EAGAIN - : -BCH_ERR_ENOSPC_bucket_alloc; + ret = PTR_ERR_OR_ZERO(ob[nr_got]); + if (ret) break; - } bu[nr_got] = ob[nr_got]->bucket; } @@ -930,7 +902,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, closure_init_stack(&cl); - while (ja->nr != nr && (ret == 0 || ret == -EAGAIN)) { + while (ja->nr != nr && (ret == 0 || ret == -BCH_ERR_bucket_alloc_blocked)) { struct disk_reservation disk_res = { 0, 0 }; closure_sync(&cl); @@ -1198,6 +1170,8 @@ void bch2_fs_journal_exit(struct journal *j) { unsigned i; + darray_exit(&j->early_journal_entries); + for (i = 0; i < ARRAY_SIZE(j->buf); i++) kvpfree(j->buf[i].data, j->buf[i].buf_size); free_fifo(&j->pin); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index 51d29a01..ee37f90a 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -479,7 +479,7 @@ static inline int bch2_journal_preres_get(struct journal *j, return 0; if (flags & JOURNAL_RES_GET_NONBLOCK) - return -EAGAIN; + return -BCH_ERR_journal_preres_get_blocked; return __bch2_journal_preres_get(j, res, new_u64s, flags); } @@ -497,7 +497,6 @@ int bch2_journal_flush_seq(struct journal *, u64); int bch2_journal_flush(struct journal *); bool bch2_journal_noflush_seq(struct journal *, u64); int bch2_journal_meta(struct journal *); -int bch2_journal_log_msg(struct journal *, const char *, ...); void bch2_journal_halt(struct journal *); diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 2b1974a9..d6f25934 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -1080,7 +1080,10 @@ void bch2_journal_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) +int bch2_journal_read(struct bch_fs *c, + u64 *last_seq, + u64 *blacklist_seq, + u64 *start_seq) { struct journal_list jlist; struct journal_replay *i, **_i, *prev = NULL; @@ -1089,7 +1092,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) unsigned iter; struct printbuf buf = PRINTBUF; bool degraded = false, last_write_torn = false; - u64 seq, last_seq = 0; + u64 seq; int ret = 0; closure_init_stack(&jlist.cl); @@ -1118,15 +1121,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) if (jlist.ret) return jlist.ret; + *last_seq = 0; *start_seq = 0; *blacklist_seq = 0; /* * Find most recent flush entry, and ignore newer non flush entries - * those entries will be blacklisted: - * - * - * XXX check for torn write on last journal entry */ genradix_for_each_reverse(&c->journal_entries, radix_iter, _i) { int write = READ; @@ -1140,13 +1141,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) *blacklist_seq = *start_seq = le64_to_cpu(i->j.seq) + 1; if (JSET_NO_FLUSH(&i->j)) { - journal_replay_free(c, i); + i->ignore = true; continue; } if (!last_write_torn && !i->csum_good) { last_write_torn = true; - journal_replay_free(c, i); + i->ignore = true; continue; } @@ -1157,7 +1158,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) le64_to_cpu(i->j.seq))) i->j.last_seq = i->j.seq; - last_seq = le64_to_cpu(i->j.last_seq); + *last_seq = le64_to_cpu(i->j.last_seq); *blacklist_seq = le64_to_cpu(i->j.seq) + 1; break; } @@ -1167,13 +1168,13 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) return 0; } - if (!last_seq) { + if (!*last_seq) { fsck_err(c, "journal read done, but no entries found after dropping non-flushes"); return 0; } bch_info(c, "journal read done, replaying entries %llu-%llu", - last_seq, *blacklist_seq - 1); + *last_seq, *blacklist_seq - 1); if (*start_seq != *blacklist_seq) bch_info(c, "dropped unflushed entries %llu-%llu", @@ -1187,7 +1188,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) continue; seq = le64_to_cpu(i->j.seq); - if (seq < last_seq) { + if (seq < *last_seq) { journal_replay_free(c, i); continue; } @@ -1195,13 +1196,12 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) if (bch2_journal_seq_is_blacklisted(c, seq, true)) { fsck_err_on(!JSET_NO_FLUSH(&i->j), c, "found blacklisted journal entry %llu", seq); - - journal_replay_free(c, i); + i->ignore = true; } } /* Check for missing entries: */ - seq = last_seq; + seq = *last_seq; genradix_for_each(&c->journal_entries, radix_iter, _i) { i = *_i; @@ -1239,7 +1239,7 @@ int bch2_journal_read(struct bch_fs *c, u64 *blacklist_seq, u64 *start_seq) " prev at %s\n" " next at %s", missing_start, missing_end, - last_seq, *blacklist_seq - 1, + *last_seq, *blacklist_seq - 1, buf1.buf, buf2.buf); printbuf_exit(&buf1); diff --git a/libbcachefs/journal_io.h b/libbcachefs/journal_io.h index 2f8bbf06..a32c2876 100644 --- a/libbcachefs/journal_io.h +++ b/libbcachefs/journal_io.h @@ -52,7 +52,7 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, struct journal_replay *); -int bch2_journal_read(struct bch_fs *, u64 *, u64 *); +int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); void bch2_journal_write(struct closure *); diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 045ee95a..c8729cb3 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -177,6 +177,8 @@ enum journal_errors { #undef x }; +typedef DARRAY(u64) darray_u64; + /* Embedded in struct bch_fs */ struct journal { /* Fastpath stuff up front: */ @@ -204,6 +206,12 @@ struct journal { unsigned buf_size_want; /* + * We may queue up some things to be journalled (log messages) before + * the journal has actually started - stash them here: + */ + darray_u64 early_journal_entries; + + /* * Two journal entries -- one is currently open for new entries, the * other is possibly being written out. */ diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index 380b66c7..e542cd3d 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -8,6 +8,7 @@ #include "lru.h" #include "recovery.h" +/* KEY_TYPE_lru is obsolete: */ int bch2_lru_invalid(const struct bch_fs *c, struct bkey_s_c k, int rw, struct printbuf *err) { @@ -30,101 +31,57 @@ void bch2_lru_to_text(struct printbuf *out, struct bch_fs *c, prt_printf(out, "idx %llu", le64_to_cpu(lru->idx)); } -int bch2_lru_delete(struct btree_trans *trans, u64 id, u64 idx, u64 time, - struct bkey_s_c orig_k) +static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, + u64 dev_bucket, u64 time, unsigned key_type) { struct btree_iter iter; - struct bkey_s_c k; - u64 existing_idx; - struct printbuf buf = PRINTBUF; + struct bkey_i *k; int ret = 0; if (!time) return 0; - bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, - POS(id, time), - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES); - k = bch2_btree_iter_peek_slot(&iter); - ret = bkey_err(k); - if (ret) - goto err; + k = bch2_trans_kmalloc_nomemzero(trans, sizeof(*k)); + ret = PTR_ERR_OR_ZERO(k); + if (unlikely(ret)) + return ret; - if (k.k->type != KEY_TYPE_lru) { - bch2_bkey_val_to_text(&buf, trans->c, orig_k); - bch2_trans_inconsistent(trans, - "pointer to nonexistent lru %llu:%llu\n%s", - id, time, buf.buf); - ret = -EIO; - goto err; - } + bkey_init(&k->k); + k->k.type = key_type; + k->k.p = lru_pos(lru_id, dev_bucket, time); - existing_idx = le64_to_cpu(bkey_s_c_to_lru(k).v->idx); - if (existing_idx != idx) { - bch2_bkey_val_to_text(&buf, trans->c, orig_k); - bch2_trans_inconsistent(trans, - "lru %llu:%llu with wrong backpointer: got %llu, should be %llu\n%s", - id, time, existing_idx, idx, buf.buf); - ret = -EIO; - goto err; - } + EBUG_ON(lru_pos_id(k->k.p) != lru_id); + EBUG_ON(lru_pos_time(k->k.p) != time); + EBUG_ON(k->k.p.offset != dev_bucket); - ret = bch2_btree_delete_at(trans, &iter, 0); -err: + bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, + k->k.p, BTREE_ITER_INTENT); + + ret = bch2_btree_iter_traverse(&iter) ?: + bch2_trans_update(trans, &iter, k, 0); bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); return ret; } -int bch2_lru_set(struct btree_trans *trans, u64 lru_id, u64 idx, u64 *time) +int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - struct btree_iter iter; - struct bkey_s_c k; - struct bkey_i_lru *lru; - int ret = 0; - - if (!*time) - return 0; - - for_each_btree_key_norestart(trans, iter, BTREE_ID_lru, - POS(lru_id, *time), - BTREE_ITER_SLOTS| - BTREE_ITER_INTENT| - BTREE_ITER_WITH_UPDATES, k, ret) - if (bkey_deleted(k.k)) - break; - - if (ret) - goto err; - - BUG_ON(iter.pos.inode != lru_id); - *time = iter.pos.offset; - - lru = bch2_bkey_alloc(trans, &iter, lru); - ret = PTR_ERR_OR_ZERO(lru); - if (ret) - goto err; - - lru->v.idx = cpu_to_le64(idx); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); +} - ret = bch2_trans_update(trans, &iter, &lru->k_i, 0); - if (ret) - goto err; -err: - bch2_trans_iter_exit(trans, &iter); - return ret; +int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) +{ + return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); } -int bch2_lru_change(struct btree_trans *trans, u64 id, u64 idx, - u64 old_time, u64 *new_time, - struct bkey_s_c k) +int bch2_lru_change(struct btree_trans *trans, + u16 lru_id, u64 dev_bucket, + u64 old_time, u64 new_time) { - if (old_time == *new_time) + if (old_time == new_time) return 0; - return bch2_lru_delete(trans, id, idx, old_time, k) ?: - bch2_lru_set(trans, id, idx, new_time); + return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: + bch2_lru_set(trans, lru_id, dev_bucket, new_time); } static int bch2_check_lru_key(struct btree_trans *trans, @@ -138,12 +95,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; - struct bpos alloc_pos; + struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); int ret; - alloc_pos = POS(lru_k.k->p.inode, - le64_to_cpu(bkey_s_c_to_lru(lru_k).v->idx)); - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), c, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) @@ -157,10 +111,12 @@ static int bch2_check_lru_key(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &a_convert); - if (fsck_err_on(a->data_type != BCH_DATA_cached || - a->io_time[READ] != lru_k.k->p.offset, c, - "incorrect lru entry %s\n" + if (fsck_err_on(lru_k.k->type != KEY_TYPE_set || + a->data_type != BCH_DATA_cached || + a->io_time[READ] != lru_pos_time(lru_k.k->p), c, + "incorrect lru entry (time %llu) %s\n" " for %s", + lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) { ret = bch2_btree_delete_at(trans, lru_iter, 0); diff --git a/libbcachefs/lru.h b/libbcachefs/lru.h index 925c29b4..2e22f139 100644 --- a/libbcachefs/lru.h +++ b/libbcachefs/lru.h @@ -2,6 +2,26 @@ #ifndef _BCACHEFS_LRU_H #define _BCACHEFS_LRU_H +#define LRU_TIME_BITS 48 +#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1) + +static inline struct bpos lru_pos(u16 lru_id, u64 dev_bucket, u64 time) +{ + EBUG_ON(time > LRU_TIME_MAX); + + return POS(((u64) lru_id << LRU_TIME_BITS)|time, dev_bucket); +} + +static inline u64 lru_pos_id(struct bpos pos) +{ + return pos.inode >> LRU_TIME_BITS; +} + +static inline u64 lru_pos_time(struct bpos pos) +{ + return pos.inode & ~(~0ULL << LRU_TIME_BITS); +} + int bch2_lru_invalid(const struct bch_fs *, struct bkey_s_c, int, struct printbuf *); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); @@ -10,9 +30,9 @@ void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); .val_to_text = bch2_lru_to_text, \ }) -int bch2_lru_delete(struct btree_trans *, u64, u64, u64, struct bkey_s_c); -int bch2_lru_set(struct btree_trans *, u64, u64, u64 *); -int bch2_lru_change(struct btree_trans *, u64, u64, u64, u64 *, struct bkey_s_c); +int bch2_lru_del(struct btree_trans *, u16, u64, u64); +int bch2_lru_set(struct btree_trans *, u16, u64, u64); +int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); int bch2_check_lrus(struct bch_fs *); diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 47b77b3c..b308354a 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" +#include "alloc_background.h" #include "alloc_foreground.h" #include "backpointers.h" #include "bkey_buf.h" @@ -257,7 +258,7 @@ static int bch2_move_extent(struct btree_trans *trans, } if (!percpu_ref_tryget_live(&c->writes)) - return -EROFS; + return -BCH_ERR_erofs_no_writes; /* * Before memory allocations & taking nocow locks in @@ -661,13 +662,29 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, struct btree_iter iter; struct bkey_buf sk; struct bch_backpointer bp; + struct bch_alloc_v4 a_convert; + const struct bch_alloc_v4 *a; + struct bkey_s_c k; struct data_update_opts data_opts; + unsigned dirty_sectors, bucket_size; u64 bp_offset = 0, cur_inum = U64_MAX; int ret = 0; bch2_bkey_buf_init(&sk); bch2_trans_init(&trans, c, 0, 0); + bch2_trans_iter_init(&trans, &iter, BTREE_ID_alloc, + bucket, BTREE_ITER_CACHED); + ret = lockrestart_do(&trans, + bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + bch2_trans_iter_exit(&trans, &iter); + + if (!ret) { + a = bch2_alloc_to_v4(k, &a_convert); + dirty_sectors = a->dirty_sectors; + bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; + } + while (!(ret = move_ratelimit(&trans, ctxt))) { bch2_trans_begin(&trans); @@ -765,6 +782,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, bp_offset++; } + trace_evacuate_bucket(c, &bucket, dirty_sectors, bucket_size, ret); + if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) && gen >= 0) { bch2_trans_unlock(&trans); move_ctxt_wait_event(ctxt, NULL, list_empty(&ctxt->reads)); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index fbc8043e..f0ab65ff 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -163,7 +163,7 @@ static int bch2_copygc(struct bch_fs *c) bch2_moving_ctxt_exit(&ctxt); - if (ret < 0 && ret != -EROFS) + if (ret < 0 && !bch2_err_matches(ret, EROFS)) bch_err(c, "error from bch2_move_data() in copygc: %s", bch2_err_str(ret)); trace_and_count(c, copygc, c, atomic64_read(&move_stats.sectors_moved), 0, 0, 0); diff --git a/libbcachefs/nocow_locking.c b/libbcachefs/nocow_locking.c index b325fb10..bff62671 100644 --- a/libbcachefs/nocow_locking.c +++ b/libbcachefs/nocow_locking.c @@ -4,12 +4,116 @@ #include "nocow_locking.h" #include "util.h" +#include <linux/closure.h> + +bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, struct bpos bucket) +{ + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); + unsigned i; + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (l->b[i] == dev_bucket && atomic_read(&l->l[i])) + return true; + return false; +} + +void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) +{ + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); + int lock_val = flags ? 1 : -1; + unsigned i; + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (l->b[i] == dev_bucket) { + if (!atomic_sub_return(lock_val, &l->l[i])) + closure_wake_up(&l->wait); + return; + } + + BUG(); +} + +static bool bch2_bucket_nocow_trylock(struct nocow_lock_bucket *l, + u64 dev_bucket, int flags) +{ + int v, lock_val = flags ? 1 : -1; + unsigned i; + + spin_lock(&l->lock); + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (l->b[i] == dev_bucket) + goto got_entry; + + for (i = 0; i < ARRAY_SIZE(l->b); i++) + if (!atomic_read(&l->l[i])) { + l->b[i] = dev_bucket; + goto take_lock; + } +fail: + spin_unlock(&l->lock); + return false; +got_entry: + v = atomic_read(&l->l[i]); + if (lock_val > 0 ? v < 0 : v > 0) + goto fail; +take_lock: + atomic_add(lock_val, &l->l[i]); + spin_unlock(&l->lock); + return true; +} + void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, - two_state_lock_t *l, int flags) + struct nocow_lock_bucket *l, + u64 dev_bucket, int flags) +{ + if (!bch2_bucket_nocow_trylock(l, dev_bucket, flags)) { + struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); + u64 start_time = local_clock(); + + __closure_wait_event(&l->wait, bch2_bucket_nocow_trylock(l, dev_bucket, flags)); + bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); + } +} + +void bch2_nocow_locks_to_text(struct printbuf *out, struct bucket_nocow_lock_table *t) { - struct bch_fs *c = container_of(t, struct bch_fs, nocow_locks); - u64 start_time = local_clock(); + unsigned i, nr_zero = 0; + struct nocow_lock_bucket *l; + + for (l = t->l; l < t->l + ARRAY_SIZE(t->l); l++) { + unsigned v = 0; + + for (i = 0; i < ARRAY_SIZE(l->l); i++) + v |= atomic_read(&l->l[i]); + + if (!v) { + nr_zero++; + continue; + } + + if (nr_zero) + prt_printf(out, "(%u empty entries)\n", nr_zero); + nr_zero = 0; + + for (i = 0; i < ARRAY_SIZE(l->l); i++) + if (atomic_read(&l->l[i])) + prt_printf(out, "%llu: %i ", l->b[i], atomic_read(&l->l[i])); + prt_newline(out); + } + + if (nr_zero) + prt_printf(out, "(%u empty entries)\n", nr_zero); +} + +int bch2_fs_nocow_locking_init(struct bch_fs *c) +{ + unsigned i; + + for (i = 0; i < ARRAY_SIZE(c->nocow_locks.l); i++) + spin_lock_init(&c->nocow_locks.l[i].lock); - __bch2_two_state_lock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); - bch2_time_stats_update(&c->times[BCH_TIME_nocow_lock_contended], start_time); + return 0; } diff --git a/libbcachefs/nocow_locking.h b/libbcachefs/nocow_locking.h index 2a7a9f44..45258cc3 100644 --- a/libbcachefs/nocow_locking.h +++ b/libbcachefs/nocow_locking.h @@ -2,54 +2,38 @@ #ifndef _BCACHEFS_NOCOW_LOCKING_H #define _BCACHEFS_NOCOW_LOCKING_H -#include "bcachefs_format.h" -#include "two_state_shared_lock.h" +#include "bcachefs.h" +#include "alloc_background.h" +#include "nocow_locking_types.h" #include <linux/hash.h> -#define BUCKET_NOCOW_LOCKS_BITS 10 -#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) - -struct bucket_nocow_lock_table { - two_state_lock_t l[BUCKET_NOCOW_LOCKS]; -}; - -#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) - -static inline two_state_lock_t *bucket_nocow_lock(struct bucket_nocow_lock_table *t, - struct bpos bucket) +static inline struct nocow_lock_bucket *bucket_nocow_lock(struct bucket_nocow_lock_table *t, + u64 dev_bucket) { - u64 dev_bucket = bucket.inode << 56 | bucket.offset; unsigned h = hash_64(dev_bucket, BUCKET_NOCOW_LOCKS_BITS); return t->l + (h & (BUCKET_NOCOW_LOCKS - 1)); } -static inline bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *t, - struct bpos bucket) -{ - two_state_lock_t *l = bucket_nocow_lock(t, bucket); - - return atomic_long_read(&l->v) != 0; -} - -static inline void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *t, - struct bpos bucket, int flags) -{ - two_state_lock_t *l = bucket_nocow_lock(t, bucket); - - bch2_two_state_unlock(l, flags & BUCKET_NOCOW_LOCK_UPDATE); -} +#define BUCKET_NOCOW_LOCK_UPDATE (1 << 0) -void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, two_state_lock_t *, int); +bool bch2_bucket_nocow_is_locked(struct bucket_nocow_lock_table *, struct bpos); +void bch2_bucket_nocow_unlock(struct bucket_nocow_lock_table *, struct bpos, int); +void __bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *, + struct nocow_lock_bucket *, u64, int); static inline void bch2_bucket_nocow_lock(struct bucket_nocow_lock_table *t, struct bpos bucket, int flags) { - two_state_lock_t *l = bucket_nocow_lock(t, bucket); + u64 dev_bucket = bucket_to_u64(bucket); + struct nocow_lock_bucket *l = bucket_nocow_lock(t, dev_bucket); - if (!bch2_two_state_trylock(l, flags & BUCKET_NOCOW_LOCK_UPDATE)) - __bch2_bucket_nocow_lock(t, l, flags); + __bch2_bucket_nocow_lock(t, l, dev_bucket, flags); } +void bch2_nocow_locks_to_text(struct printbuf *, struct bucket_nocow_lock_table *); + +int bch2_fs_nocow_locking_init(struct bch_fs *); + #endif /* _BCACHEFS_NOCOW_LOCKING_H */ diff --git a/libbcachefs/nocow_locking_types.h b/libbcachefs/nocow_locking_types.h new file mode 100644 index 00000000..bd12bf67 --- /dev/null +++ b/libbcachefs/nocow_locking_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _BCACHEFS_NOCOW_LOCKING_TYPES_H +#define _BCACHEFS_NOCOW_LOCKING_TYPES_H + +#define BUCKET_NOCOW_LOCKS_BITS 10 +#define BUCKET_NOCOW_LOCKS (1U << BUCKET_NOCOW_LOCKS_BITS) + +struct nocow_lock_bucket { + struct closure_waitlist wait; + spinlock_t lock; + u64 b[4]; + atomic_t l[4]; +} __aligned(SMP_CACHE_BYTES); + +struct bucket_nocow_lock_table { + struct nocow_lock_bucket l[BUCKET_NOCOW_LOCKS]; +}; + +#endif /* _BCACHEFS_NOCOW_LOCKING_TYPES_H */ + diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 4fd8ce55..8df94ad5 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -129,12 +129,12 @@ search: if (!*idx) *idx = __bch2_journal_key_search(keys, btree_id, level, pos); - while (*idx < keys->nr && - (k = idx_to_key(keys, *idx), - k->btree_id == btree_id && - k->level == level && - bpos_le(k->k->k.p, end_pos))) { - if (bpos_ge(k->k->k.p, pos) && !k->overwritten) + while ((k = *idx < keys->nr ? idx_to_key(keys, *idx) : NULL)) { + if (__journal_key_cmp(btree_id, level, end_pos, k) < 0) + return NULL; + + if (__journal_key_cmp(btree_id, level, pos, k) <= 0 && + !k->overwritten) return k->k; (*idx)++; @@ -588,7 +588,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_journal_replay(struct bch_fs *c) +static int bch2_journal_replay(struct bch_fs *c, u64 start_seq, u64 end_seq) { struct journal_keys *keys = &c->journal_keys; struct journal_key **keys_sorted, *k; @@ -610,6 +610,13 @@ static int bch2_journal_replay(struct bch_fs *c) sizeof(keys_sorted[0]), journal_sort_seq_cmp, NULL); + if (keys->nr) { + ret = bch2_fs_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)", + keys->nr, start_seq, end_seq); + if (ret) + goto err; + } + for (i = 0; i < keys->nr; i++) { k = keys_sorted[i]; @@ -625,8 +632,8 @@ static int bch2_journal_replay(struct bch_fs *c) : 0), bch2_journal_replay_key(&trans, k)); if (ret) { - bch_err(c, "journal replay: error %d while replaying key at btree %s level %u", - ret, bch2_btree_ids[k->btree_id], k->level); + bch_err(c, "journal replay: error while replaying key at btree %s level %u: %s", + bch2_btree_ids[k->btree_id], k->level, bch2_err_str(ret)); goto err; } } @@ -639,7 +646,7 @@ static int bch2_journal_replay(struct bch_fs *c) ret = bch2_journal_error(j); if (keys->nr && !ret) - bch2_journal_log_msg(&c->journal, "journal replay finished"); + bch2_fs_log_msg(c, "journal replay finished"); err: kvfree(keys_sorted); return ret; @@ -922,6 +929,7 @@ static bool btree_id_is_alloc(enum btree_id id) case BTREE_ID_backpointers: case BTREE_ID_need_discard: case BTREE_ID_freespace: + case BTREE_ID_bucket_gens: return true; default: return false; @@ -1044,7 +1052,7 @@ int bch2_fs_recovery(struct bch_fs *c) const char *err = "cannot allocate memory"; struct bch_sb_field_clean *clean = NULL; struct jset *last_journal_entry = NULL; - u64 blacklist_seq, journal_seq; + u64 last_seq, blacklist_seq, journal_seq; bool write_sb = false; int ret = 0; @@ -1086,14 +1094,11 @@ int bch2_fs_recovery(struct bch_fs *c) } if (!c->opts.nochanges) { - if (c->sb.version < bcachefs_metadata_version_backpointers) { + if (c->sb.version < bcachefs_metadata_version_lru_v2) { bch_info(c, "version prior to backpointers, upgrade and fsck required"); c->opts.version_upgrade = true; c->opts.fsck = true; c->opts.fix_errors = FSCK_OPT_YES; - } else if (c->sb.version < bcachefs_metadata_version_inode_v3) { - bch_info(c, "version prior to inode_v3, upgrade required"); - c->opts.version_upgrade = true; } } @@ -1114,7 +1119,7 @@ int bch2_fs_recovery(struct bch_fs *c) struct journal_replay **i; bch_verbose(c, "starting journal read"); - ret = bch2_journal_read(c, &blacklist_seq, &journal_seq); + ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq); if (ret) goto err; @@ -1142,7 +1147,15 @@ int bch2_fs_recovery(struct bch_fs *c) if (!last_journal_entry) { fsck_err_on(!c->sb.clean, c, "no journal entries found"); - goto use_clean; + if (clean) + goto use_clean; + + genradix_for_each_reverse(&c->journal_entries, iter, i) + if (*i) { + last_journal_entry = &(*i)->j; + (*i)->ignore = false; + break; + } } ret = journal_keys_sort(c); @@ -1188,7 +1201,9 @@ use_clean: journal_seq += 8; if (blacklist_seq != journal_seq) { - ret = bch2_journal_seq_blacklist_add(c, + ret = bch2_fs_log_msg(c, "blacklisting entries %llu-%llu", + blacklist_seq, journal_seq) ?: + bch2_journal_seq_blacklist_add(c, blacklist_seq, journal_seq); if (ret) { bch_err(c, "error creating new journal seq blacklist entry"); @@ -1196,7 +1211,9 @@ use_clean: } } - ret = bch2_fs_journal_start(&c->journal, journal_seq); + ret = bch2_fs_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu", + journal_seq, last_seq, blacklist_seq - 1) ?: + bch2_fs_journal_start(&c->journal, journal_seq); if (ret) goto err; @@ -1245,13 +1262,6 @@ use_clean: set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - bch_info(c, "checking need_discard and freespace btrees"); - err = "error checking need_discard and freespace btrees"; - ret = bch2_check_alloc_info(c); - if (ret) - goto err; - bch_verbose(c, "done checking need_discard and freespace btrees"); - if (c->sb.version < bcachefs_metadata_version_snapshot_2) { err = "error creating root snapshot node"; ret = bch2_fs_initialize_subvolumes(c); @@ -1270,12 +1280,21 @@ use_clean: bch_info(c, "starting journal replay, %zu keys", c->journal_keys.nr); err = "journal replay failed"; - ret = bch2_journal_replay(c); + ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1); if (ret) goto err; if (c->opts.verbose || !c->sb.clean) bch_info(c, "journal replay done"); + bch_info(c, "checking need_discard and freespace btrees"); + err = "error checking need_discard and freespace btrees"; + ret = bch2_check_alloc_info(c); + if (ret) + goto err; + bch_verbose(c, "done checking need_discard and freespace btrees"); + + set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); + bch_info(c, "checking lrus"); err = "error checking lrus"; ret = bch2_check_lrus(c); @@ -1315,6 +1334,7 @@ use_clean: set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); } else { set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_CHECK_ALLOC_DONE, &c->flags); set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); set_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags); set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); @@ -1341,7 +1361,7 @@ use_clean: bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); err = "journal replay failed"; - ret = bch2_journal_replay(c); + ret = bch2_journal_replay(c, last_seq, blacklist_seq - 1); if (ret) goto err; if (c->opts.verbose || !c->sb.clean) diff --git a/libbcachefs/reflink.c b/libbcachefs/reflink.c index ec672fed..e89a9a1a 100644 --- a/libbcachefs/reflink.c +++ b/libbcachefs/reflink.c @@ -283,7 +283,7 @@ s64 bch2_remap_range(struct bch_fs *c, int ret = 0, ret2 = 0; if (!percpu_ref_tryget_live(&c->writes)) - return -EROFS; + return -BCH_ERR_erofs_no_writes; bch2_check_set_feature(c, BCH_FEATURE_reflink); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index 0aa243f5..738b68b5 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -804,6 +804,11 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); + if (c->opts.version_upgrade) { + c->disk_sb.sb->magic = BCHFS_MAGIC; + c->disk_sb.sb->layout.magic = BCHFS_MAGIC; + } + le64_add_cpu(&c->disk_sb.sb->seq, 1); if (test_bit(BCH_FS_ERROR, &c->flags)) @@ -858,7 +863,7 @@ int bch2_write_super(struct bch_fs *c) le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); percpu_ref_put(&ca->io_ref); - ret = -EROFS; + ret = -BCH_ERR_erofs_sb_err; goto out; } @@ -868,7 +873,7 @@ int bch2_write_super(struct bch_fs *c) le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); percpu_ref_put(&ca->io_ref); - ret = -EROFS; + ret = -BCH_ERR_erofs_sb_err; goto out; } } diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 7cac0567..95c16f70 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -37,6 +37,7 @@ #include "move.h" #include "migrate.h" #include "movinggc.h" +#include "nocow_locking.h" #include "quota.h" #include "rebalance.h" #include "recovery.h" @@ -803,6 +804,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) bch2_fs_buckets_waiting_for_journal_init(c) ?: bch2_fs_subvolumes_init(c) ?: bch2_fs_io_init(c) ?: + bch2_fs_nocow_locking_init(c) ?: bch2_fs_encryption_init(c) ?: bch2_fs_compress_init(c) ?: bch2_fs_ec_init(c) ?: diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 647d018b..6e49cf98 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -27,6 +27,7 @@ #include "journal.h" #include "keylist.h" #include "move.h" +#include "nocow_locking.h" #include "opts.h" #include "rebalance.h" #include "replicas.h" @@ -194,6 +195,7 @@ read_attribute(btree_cache); read_attribute(btree_key_cache); read_attribute(stripes_heap); read_attribute(open_buckets); +read_attribute(nocow_lock_table); read_attribute(internal_uuid); @@ -445,6 +447,9 @@ SHOW(bch2_fs) if (attr == &sysfs_data_jobs) data_progress_to_text(out, c); + if (attr == &sysfs_nocow_lock_table) + bch2_nocow_locks_to_text(out, &c->nocow_locks); + return 0; } @@ -627,6 +632,7 @@ struct attribute *bch2_fs_internal_files[] = { &sysfs_new_stripes, &sysfs_stripes_heap, &sysfs_open_buckets, + &sysfs_nocow_lock_table, &sysfs_io_timers_read, &sysfs_io_timers_write, diff --git a/linux/shrinker.c b/linux/shrinker.c index 23e288d8..0b5715b3 100644 --- a/linux/shrinker.c +++ b/linux/shrinker.c @@ -1,6 +1,7 @@ #include <stdio.h> +#include <linux/kthread.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/mutex.h> @@ -126,3 +127,31 @@ void run_shrinkers(gfp_t gfp_mask, bool allocation_failed) } mutex_unlock(&shrinker_lock); } + +static int shrinker_thread(void *arg) +{ + while (!kthread_should_stop()) { + sleep(1); + run_shrinkers(GFP_KERNEL, false); + } + + return 0; +} + +struct task_struct *shrinker_task; + +__attribute__((constructor(103))) +static void shrinker_thread_init(void) +{ + shrinker_task = kthread_run(shrinker_thread, NULL, "shrinkers"); + BUG_ON(IS_ERR(shrinker_task)); +} + +__attribute__((destructor(103))) +static void shrinker_thread_exit(void) +{ + int ret = kthread_stop(shrinker_task); + BUG_ON(ret); + + shrinker_task = NULL; +} |