diff options
-rw-r--r-- | fs/bcachefs/bcachefs.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 43 | ||||
-rw-r--r-- | fs/bcachefs/fs.c | 20 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 49 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.h | 7 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 15 | ||||
-rw-r--r-- | fs/bcachefs/util.c | 5 |
7 files changed, 97 insertions, 44 deletions
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 41c6d8865a74..fb3156ed7f0b 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -1288,7 +1288,7 @@ static inline int bch2_fs_casefold_enabled(struct bch_fs *c) { if (!IS_ENABLED(CONFIG_UNICODE)) return bch_err_throw(c, no_casefolding_without_utf8); - if (!c->opts.casefold_disabled) + if (c->opts.casefold_disabled) return bch_err_throw(c, casefolding_disabled); return 0; } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 19fd951495ac..84e302afc8fc 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1337,15 +1337,42 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, btree_node_reset_sib_u64s(b); - scoped_guard(rcu) - bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { - struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); - - if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { - set_btree_node_need_rewrite(b); - set_btree_node_need_rewrite_degraded(b); + /* + * XXX: + * + * We deadlock if too many btree updates require node rewrites while + * we're still in journal replay. + * + * This is because btree node rewrites generate more updates for the + * interior updates (alloc, backpointers), and if those updates touch + * new nodes and generate more rewrites - well, you see the problem. + * + * The biggest cause is that we don't use the btree write buffer (for + * the backpointer updates - this needs some real thought on locking in + * order to fix. + * + * The problem with this workaround (not doing the rewrite for degraded + * nodes in journal replay) is that those degraded nodes persist, and we + * don't want that (this is a real bug when a btree node write completes + * with fewer replicas than we wanted and leaves a degraded node due to + * device _removal_, i.e. the device went away mid write). + * + * It's less of a bug here, but still a problem because we don't yet + * have a way of tracking degraded data - we another index (all + * extents/btree nodes, by replicas entry) in order to fix properly + * (re-replicate degraded data at the earliest possible time). + */ + if (c->recovery.passes_complete & BIT_ULL(BCH_RECOVERY_PASS_journal_replay)) { + scoped_guard(rcu) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) { + struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev); + + if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) { + set_btree_node_need_rewrite(b); + set_btree_node_need_rewrite_degraded(b); + } } - } + } if (!ptr_written) { set_btree_node_need_rewrite(b); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index f9bc99eb2d02..3b0783f117ae 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1692,11 +1692,15 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, s.mask = map_defined(bch_flags_to_xflags); s.flags |= map_flags_rev(bch_flags_to_xflags, fa->fsx_xflags); - if (fa->fsx_xflags) - return bch_err_throw(c, unsupported_fsx_flag); + if (fa->fsx_xflags) { + ret = bch_err_throw(c, unsupported_fsx_flag); + goto err; + } - if (fa->fsx_projid >= U32_MAX) - return bch_err_throw(c, projid_too_big); + if (fa->fsx_projid >= U32_MAX) { + ret = bch_err_throw(c, projid_too_big); + goto err; + } /* * inode fields accessible via the xattr interface are stored with a +1 @@ -1718,8 +1722,10 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, fa->flags &= ~FS_CASEFOLD_FL; s.flags |= map_flags_rev(bch_flags_to_uflags, fa->flags); - if (fa->flags) - return bch_err_throw(c, unsupported_fa_flag); + if (fa->flags) { + ret = bch_err_throw(c, unsupported_fa_flag); + goto err; + } } mutex_lock(&inode->ei_update_lock); @@ -1730,7 +1736,7 @@ static int bch2_fileattr_set(struct mnt_idmap *idmap, bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); - +err: return bch2_err_class(ret); } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index dd3f3434c1b0..f3cf48193398 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1272,6 +1272,34 @@ static void bch2_journal_print_checksum_error(struct bch_fs *c, struct journal_r printbuf_exit(&buf); } +struct u64_range bch2_journal_entry_missing_range(struct bch_fs *c, u64 start, u64 end) +{ + BUG_ON(start > end); + + if (start == end) + return (struct u64_range) {}; + + while (start < end && + bch2_journal_seq_is_blacklisted(c, start, false)) + start++; + + if (start == end) + return (struct u64_range) {}; + + struct u64_range missing = { .start = start }; + + while (start < end && + !bch2_journal_seq_is_blacklisted(c, start, false)) + start++; + + missing.end = start - 1; + + if (missing.start == missing.end) + return (struct u64_range) {}; + + return missing; +} + noinline_for_stack static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 end_seq) { @@ -1290,25 +1318,12 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e BUG_ON(seq > le64_to_cpu(i->j.seq)); - while (seq < le64_to_cpu(i->j.seq)) { - while (seq < le64_to_cpu(i->j.seq) && - bch2_journal_seq_is_blacklisted(c, seq, false)) - seq++; - - if (seq == le64_to_cpu(i->j.seq)) - break; - - u64 missing_start = seq; - - while (seq < le64_to_cpu(i->j.seq) && - !bch2_journal_seq_is_blacklisted(c, seq, false)) - seq++; - - u64 missing_end = seq - 1; + struct u64_range missing; + while ((missing = bch2_journal_entry_missing_range(c, seq, le64_to_cpu(i->j.seq))).start) { printbuf_reset(&buf); prt_printf(&buf, "journal entries %llu-%llu missing! (replaying %llu-%llu)", - missing_start, missing_end, + missing.start, missing.end, start_seq, end_seq); prt_printf(&buf, "\nprev at "); @@ -1323,6 +1338,8 @@ static int bch2_journal_check_for_missing(struct bch_fs *c, u64 start_seq, u64 e prt_printf(&buf, ", continue?"); fsck_err(c, journal_entries_missing, "%s", buf.buf); + + seq = missing.end + 1; } prev = i; diff --git a/fs/bcachefs/journal_io.h b/fs/bcachefs/journal_io.h index 6fa82c4050fe..f53c5c81d137 100644 --- a/fs/bcachefs/journal_io.h +++ b/fs/bcachefs/journal_io.h @@ -71,6 +71,13 @@ void bch2_journal_entry_to_text(struct printbuf *, struct bch_fs *, void bch2_journal_ptrs_to_text(struct printbuf *, struct bch_fs *, struct journal_replay *); +struct u64_range { + u64 start; + u64 end; +}; + +struct u64_range bch2_journal_entry_missing_range(struct bch_fs *, u64, u64); + int bch2_journal_read(struct bch_fs *, u64 *, u64 *, u64 *); CLOSURE_CALLBACK(bch2_journal_write); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 7340c1118579..6980cd5b0ca8 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1024,13 +1024,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } -#if !IS_ENABLED(CONFIG_UNICODE) - if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { - printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); - ret = -EINVAL; - goto err; - } -#endif +#if IS_ENABLED(CONFIG_UNICODE) if (!bch2_fs_casefold_enabled(c)) { /* Default encoding until we can potentially have more as an option. */ c->cf_encoding = utf8_load(BCH_FS_DEFAULT_UTF8_ENCODING); @@ -1043,6 +1037,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts *opts, goto err; } } +#else + if (c->sb.features & BIT_ULL(BCH_FEATURE_casefolding)) { + printk(KERN_ERR "Cannot mount a filesystem with casefolding on a kernel without CONFIG_UNICODE\n"); + ret = -EINVAL; + goto err; + } +#endif for (i = 0; i < c->sb.nr_devices; i++) { if (!bch2_member_exists(c->disk_sb.sb, i)) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 05b40debf211..7a4436fd4441 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -299,17 +299,12 @@ int bch2_save_backtrace(bch_stacktrace *stack, struct task_struct *task, unsigne if (ret) return ret; - if (!down_read_trylock(&task->signal->exec_update_lock)) - return -1; - do { nr_entries = stack_trace_save_tsk(task, stack->data, stack->size, skipnr + 1); } while (nr_entries == stack->size && !(ret = darray_make_room_gfp(stack, stack->size * 2, gfp))); stack->nr = nr_entries; - up_read(&task->signal->exec_update_lock); - return ret; #else return 0; |