summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-08-18 12:32:10 -0400
committerKent Overstreet <kent.overstreet@gmail.com>2022-08-18 12:49:48 -0400
commitdded444b20dd3f47393937315b8217535ff3c51d (patch)
treec6f6b3973b5de757b458bb9e0f4c7d81e4fbef4d
parent51ffcc699369deaa0fb4333a68bbbdf523afba11 (diff)
Update bcachefs sources to dfaf9a6ee2 lib/printbuf: Clean up headers
-rw-r--r--.bcachefs_revision2
-rw-r--r--Makefile6
-rw-r--r--cmd_fs.c23
-rw-r--r--include/linux/printbuf.h106
-rw-r--r--include/linux/string_helpers.h20
-rw-r--r--include/trace/events/bcachefs.h23
-rw-r--r--libbcachefs/alloc_foreground.c190
-rw-r--r--libbcachefs/alloc_foreground.h8
-rw-r--r--libbcachefs/backpointers.c41
-rw-r--r--libbcachefs/bcachefs.h15
-rw-r--r--libbcachefs/bkey.c80
-rw-r--r--libbcachefs/bkey.h11
-rw-r--r--libbcachefs/btree_io.c23
-rw-r--r--libbcachefs/btree_iter.c165
-rw-r--r--libbcachefs/btree_iter.h17
-rw-r--r--libbcachefs/btree_key_cache.c15
-rw-r--r--libbcachefs/btree_locking.h30
-rw-r--r--libbcachefs/btree_types.h2
-rw-r--r--libbcachefs/btree_update_interior.c83
-rw-r--r--libbcachefs/debug.c164
-rw-r--r--libbcachefs/errcode.h1
-rw-r--r--libbcachefs/fsck.c14
-rw-r--r--libbcachefs/journal.c3
-rw-r--r--libbcachefs/move.c2
-rw-r--r--libbcachefs/rebalance.c3
-rw-r--r--libbcachefs/subvolume.c4
-rw-r--r--libbcachefs/super-io.c8
-rw-r--r--libbcachefs/sysfs.c3
-rw-r--r--libbcachefs/util.c9
-rw-r--r--libbcachefs/util.h2
-rw-r--r--linux/printbuf.c305
-rw-r--r--linux/string_helpers.c131
32 files changed, 998 insertions, 511 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index 720981ca..9f7af72c 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-49c34dadcad9c33b1e8510b5543d60c40fa0bebd
+dfaf9a6ee24f5c415635f9a75f5281f385535ebd
diff --git a/Makefile b/Makefile
index bed43bda..a5a74fed 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ PREFIX?=/usr/local
PKG_CONFIG?=pkg-config
INSTALL=install
-CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \
+CFLAGS+=-std=gnu89 -O2 -g -MMD -Wall -fPIC \
-Wno-pointer-sign \
-fno-strict-aliasing \
-fno-delete-null-pointer-checks \
@@ -195,6 +195,10 @@ update-bcachefs-sources:
git add linux/generic-radix-tree.c
cp $(LINUX_DIR)/include/linux/kmemleak.h include/linux/
git add include/linux/kmemleak.h
+ cp $(LINUX_DIR)/include/linux/printbuf.h include/linux/
+ git add include/linux/printbuf.h
+ cp $(LINUX_DIR)/lib/printbuf.c linux/
+ git add linux/printbuf.c
cp $(LINUX_DIR)/scripts/Makefile.compiler ./
git add Makefile.compiler
$(RM) libbcachefs/*.mod.c
diff --git a/cmd_fs.c b/cmd_fs.c
index 195ad302..007c8d87 100644
--- a/cmd_fs.c
+++ b/cmd_fs.c
@@ -179,8 +179,9 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
pr_uuid(out, fs.uuid.b);
prt_newline(out);
- out->tabstops[0] = 20;
- out->tabstops[1] = 36;
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
prt_str(out, "Size:");
prt_tab(out);
@@ -202,10 +203,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
prt_newline(out);
- out->tabstops[0] = 16;
- out->tabstops[1] = 32;
- out->tabstops[2] = 50;
- out->tabstops[3] = 68;
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 18);
+ printbuf_tabstop_push(out, 18);
prt_str(out, "Data type");
prt_tab(out);
@@ -255,10 +257,11 @@ static void fs_usage_to_text(struct printbuf *out, const char *path)
sort(dev_names.data, dev_names.nr,
sizeof(dev_names.data[0]), dev_by_label_cmp, NULL);
- out->tabstops[0] = 16;
- out->tabstops[1] = 36;
- out->tabstops[2] = 52;
- out->tabstops[3] = 68;
+ printbuf_tabstops_reset(out);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 20);
+ printbuf_tabstop_push(out, 16);
+ printbuf_tabstop_push(out, 14);
darray_for_each(dev_names, dev)
dev_usage_to_text(out, fs, dev);
diff --git a/include/linux/printbuf.h b/include/linux/printbuf.h
index fa8e73d5..24e62e56 100644
--- a/include/linux/printbuf.h
+++ b/include/linux/printbuf.h
@@ -32,6 +32,10 @@
* Since no equivalent yet exists for GFP_ATOMIC/GFP_NOWAIT, memory allocations
* will be done with GFP_NOWAIT if printbuf->atomic is nonzero.
*
+ * It's allowed to grab the output buffer and free it later with kfree() instead
+ * of using printbuf_exit(), if the user just needs a heap allocated string at
+ * the end.
+ *
* Memory allocation failures: We don't return errors directly, because on
* memory allocation failure we usually don't want to bail out and unwind - we
* want to print what we've got, on a best-effort basis. But code that does want
@@ -67,6 +71,8 @@ enum printbuf_si {
PRINTBUF_UNITS_10, /* use powers of 10^3 (standard SI) */
};
+#define PRINTBUF_INLINE_TABSTOPS 4
+
struct printbuf {
char *buf;
unsigned size;
@@ -82,19 +88,34 @@ struct printbuf {
bool heap_allocated:1;
enum printbuf_si si_units:1;
bool human_readable_units:1;
- u8 tabstop;
- u8 tabstops[4];
+ bool has_indent_or_tabstops:1;
+ bool suppress_indent_tabstop_handling:1;
+ u8 nr_tabstops;
+
+ /*
+ * Do not modify directly: use printbuf_tabstop_add(),
+ * printbuf_tabstop_get()
+ */
+ u8 cur_tabstop;
+ u8 _tabstops[PRINTBUF_INLINE_TABSTOPS];
};
int printbuf_make_room(struct printbuf *, unsigned);
const char *printbuf_str(const struct printbuf *);
void printbuf_exit(struct printbuf *);
-void prt_newline(struct printbuf *);
+void printbuf_tabstops_reset(struct printbuf *);
+void printbuf_tabstop_pop(struct printbuf *);
+int printbuf_tabstop_push(struct printbuf *, unsigned);
+
void printbuf_indent_add(struct printbuf *, unsigned);
void printbuf_indent_sub(struct printbuf *, unsigned);
+
+void prt_newline(struct printbuf *);
void prt_tab(struct printbuf *);
void prt_tab_rjust(struct printbuf *);
+
+void prt_bytes_indented(struct printbuf *, const char *, unsigned);
void prt_human_readable_u64(struct printbuf *, u64);
void prt_human_readable_s64(struct printbuf *, s64);
void prt_units_u64(struct printbuf *, u64);
@@ -129,7 +150,7 @@ static inline unsigned printbuf_remaining(struct printbuf *out)
static inline unsigned printbuf_written(struct printbuf *out)
{
- return min(out->pos, out->size);
+ return out->size ? min(out->pos, out->size - 1) : 0;
}
/*
@@ -150,21 +171,6 @@ static inline void printbuf_nul_terminate(struct printbuf *out)
out->buf[out->size - 1] = 0;
}
-static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
-{
- memset(out->buf + out->pos,
- c,
- min(n, printbuf_remaining(out)));
- out->pos += n;
-}
-
-static inline void prt_chars(struct printbuf *out, char c, unsigned n)
-{
- printbuf_make_room(out, n);
- __prt_chars_reserved(out, c, n);
- printbuf_nul_terminate(out);
-}
-
/* Doesn't call printbuf_make_room(), doesn't nul terminate: */
static inline void __prt_char_reserved(struct printbuf *out, char c)
{
@@ -186,14 +192,34 @@ static inline void prt_char(struct printbuf *out, char c)
printbuf_nul_terminate(out);
}
+static inline void __prt_chars_reserved(struct printbuf *out, char c, unsigned n)
+{
+ unsigned i, can_print = min(n, printbuf_remaining(out));
+
+ for (i = 0; i < can_print; i++)
+ out->buf[out->pos++] = c;
+ out->pos += n - can_print;
+}
+
+static inline void prt_chars(struct printbuf *out, char c, unsigned n)
+{
+ printbuf_make_room(out, n);
+ __prt_chars_reserved(out, c, n);
+ printbuf_nul_terminate(out);
+}
+
static inline void prt_bytes(struct printbuf *out, const void *b, unsigned n)
{
+ unsigned i, can_print;
+
printbuf_make_room(out, n);
- memcpy(out->buf + out->pos,
- b,
- min(n, printbuf_remaining(out)));
- out->pos += n;
+ can_print = min(n, printbuf_remaining(out));
+
+ for (i = 0; i < can_print; i++)
+ out->buf[out->pos++] = ((char *) b)[i];
+ out->pos += n - can_print;
+
printbuf_nul_terminate(out);
}
@@ -202,6 +228,11 @@ static inline void prt_str(struct printbuf *out, const char *str)
prt_bytes(out, str, strlen(str));
}
+static inline void prt_str_indented(struct printbuf *out, const char *str)
+{
+ prt_bytes_indented(out, str, strlen(str));
+}
+
static inline void prt_hex_byte(struct printbuf *out, u8 byte)
{
printbuf_make_room(out, 2);
@@ -226,7 +257,8 @@ static inline void printbuf_reset(struct printbuf *buf)
buf->pos = 0;
buf->allocation_failure = 0;
buf->indent = 0;
- buf->tabstop = 0;
+ buf->nr_tabstops = 0;
+ buf->cur_tabstop = 0;
}
/**
@@ -245,4 +277,30 @@ static inline void printbuf_atomic_dec(struct printbuf *buf)
buf->atomic--;
}
+/*
+ * This is used for the %pf(%p) sprintf format extension, where we pass a pretty
+ * printer and arguments to the pretty-printer to sprintf
+ *
+ * Instead of passing a pretty-printer function to sprintf directly, we pass it
+ * a pointer to a struct call_pp, so that sprintf can check that the magic
+ * number is present, which in turn ensures that the CALL_PP() macro has been
+ * used in order to typecheck the arguments to the pretty printer function
+ *
+ * Example usage:
+ * sprintf("%pf(%p)", CALL_PP(prt_bdev, bdev));
+ */
+struct call_pp {
+ unsigned long magic;
+ void *fn;
+};
+
+#define PP_TYPECHECK(fn, ...) \
+ ({ while (0) fn((struct printbuf *) NULL, ##__VA_ARGS__); })
+
+#define CALL_PP_MAGIC (unsigned long) 0xce0b92d22f6b6be4
+
+#define CALL_PP(fn, ...) \
+ (PP_TYPECHECK(fn, ##__VA_ARGS__), \
+ &((struct call_pp) { CALL_PP_MAGIC, fn })), ##__VA_ARGS__
+
#endif /* _LINUX_PRINTBUF_H */
diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
new file mode 100644
index 00000000..af587706
--- /dev/null
+++ b/include/linux/string_helpers.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STRING_HELPERS_H_
+#define _LINUX_STRING_HELPERS_H_
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+
+/* Descriptions of the types of units to
+ * print in */
+enum string_size_units {
+ STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
+ STRING_UNITS_2, /* use binary powers of 2^10 */
+};
+
+int string_get_size(u64 size, u64 blk_size, enum string_size_units units,
+ char *buf, int len);
+
+#endif
diff --git a/include/trace/events/bcachefs.h b/include/trace/events/bcachefs.h
index 2c980742..a18c59a3 100644
--- a/include/trace/events/bcachefs.h
+++ b/include/trace/events/bcachefs.h
@@ -317,24 +317,27 @@ DEFINE_EVENT(bch_fs, btree_node_cannibalize_unlock,
);
TRACE_EVENT(btree_reserve_get_fail,
- TP_PROTO(struct bch_fs *c, size_t required, struct closure *cl),
- TP_ARGS(c, required, cl),
+ TP_PROTO(const char *trans_fn,
+ unsigned long caller_ip,
+ size_t required),
+ TP_ARGS(trans_fn, caller_ip, required),
TP_STRUCT__entry(
- __field(dev_t, dev )
+ __array(char, trans_fn, 24 )
+ __field(unsigned long, caller_ip )
__field(size_t, required )
- __field(struct closure *, cl )
),
TP_fast_assign(
- __entry->dev = c->dev;
- __entry->required = required;
- __entry->cl = cl;
+ strlcpy(__entry->trans_fn, trans_fn, sizeof(__entry->trans_fn));
+ __entry->caller_ip = caller_ip;
+ __entry->required = required;
),
- TP_printk("%d,%d required %zu by %p",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->required, __entry->cl)
+ TP_printk("%s %pS required %zu",
+ __entry->trans_fn,
+ (void *) __entry->caller_ip,
+ __entry->required)
);
DEFINE_EVENT(btree_node, btree_split,
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index 0a9f1313..c57baa1f 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -339,6 +339,8 @@ static struct open_bucket *try_alloc_bucket(struct btree_trans *trans, struct bc
skipped_need_journal_commit,
skipped_nouse,
cl);
+ if (!ob)
+ iter.path->preserve = false;
err:
set_btree_iter_dontneed(&iter);
bch2_trans_iter_exit(trans, &iter);
@@ -379,15 +381,15 @@ static struct open_bucket *try_alloc_partial_bucket(struct bch_fs *c, struct bch
* journal buckets - journal buckets will be < ca->new_fs_bucket_idx
*/
static noinline struct open_bucket *
-bch2_bucket_alloc_trans_early(struct btree_trans *trans,
- struct bch_dev *ca,
- enum alloc_reserve reserve,
- u64 *cur_bucket,
- u64 *buckets_seen,
- u64 *skipped_open,
- u64 *skipped_need_journal_commit,
- u64 *skipped_nouse,
- struct closure *cl)
+bch2_bucket_alloc_early(struct btree_trans *trans,
+ struct bch_dev *ca,
+ enum alloc_reserve reserve,
+ u64 *cur_bucket,
+ u64 *buckets_seen,
+ u64 *skipped_open,
+ u64 *skipped_need_journal_commit,
+ u64 *skipped_nouse,
+ struct closure *cl)
{
struct btree_iter iter;
struct bkey_s_c k;
@@ -430,7 +432,7 @@ bch2_bucket_alloc_trans_early(struct btree_trans *trans,
return ob ?: ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
}
-static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+static struct open_bucket *bch2_bucket_alloc_freelist(struct btree_trans *trans,
struct bch_dev *ca,
enum alloc_reserve reserve,
u64 *cur_bucket,
@@ -445,15 +447,6 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
struct open_bucket *ob = NULL;
int ret;
- if (unlikely(!ca->mi.freespace_initialized))
- return bch2_bucket_alloc_trans_early(trans, ca, reserve,
- cur_bucket,
- buckets_seen,
- skipped_open,
- skipped_need_journal_commit,
- skipped_nouse,
- cl);
-
BUG_ON(ca->new_fs_bucket_idx);
/*
@@ -467,7 +460,7 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
break;
for (*cur_bucket = max(*cur_bucket, bkey_start_offset(k.k));
- *cur_bucket < k.k->p.offset && !ob;
+ *cur_bucket < k.k->p.offset;
(*cur_bucket)++) {
ret = btree_trans_too_many_iters(trans);
if (ret)
@@ -481,6 +474,8 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
skipped_need_journal_commit,
skipped_nouse,
k, cl);
+ if (ob)
+ break;
}
if (ob || ret)
@@ -496,11 +491,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
*
* Returns index of bucket on success, 0 on failure
* */
-struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
+ struct bch_dev *ca,
enum alloc_reserve reserve,
bool may_alloc_partial,
struct closure *cl)
{
+ struct bch_fs *c = trans->c;
struct open_bucket *ob = NULL;
struct bch_dev_usage usage;
bool freespace_initialized = READ_ONCE(ca->mi.freespace_initialized);
@@ -512,7 +509,6 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
u64 skipped_need_journal_commit = 0;
u64 skipped_nouse = 0;
bool waiting = false;
- int ret;
again:
usage = bch2_dev_usage_read(ca);
avail = dev_buckets_free(ca, usage, reserve);
@@ -549,19 +545,26 @@ again:
return ob;
}
- ret = bch2_trans_do(c, NULL, NULL, 0,
- PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
- &cur_bucket,
- &buckets_seen,
- &skipped_open,
- &skipped_need_journal_commit,
- &skipped_nouse,
- cl)));
+ ob = likely(ca->mi.freespace_initialized)
+ ? bch2_bucket_alloc_freelist(trans, ca, reserve,
+ &cur_bucket,
+ &buckets_seen,
+ &skipped_open,
+ &skipped_need_journal_commit,
+ &skipped_nouse,
+ cl)
+ : bch2_bucket_alloc_early(trans, ca, reserve,
+ &cur_bucket,
+ &buckets_seen,
+ &skipped_open,
+ &skipped_need_journal_commit,
+ &skipped_nouse,
+ cl);
if (skipped_need_journal_commit * 2 > avail)
bch2_journal_flush_async(&c->journal, NULL);
- if (!ob && !ret && !freespace_initialized && start) {
+ if (!ob && !freespace_initialized && start) {
start = cur_bucket = 0;
goto again;
}
@@ -570,7 +573,7 @@ again:
ca->bucket_alloc_trans_early_cursor = cur_bucket;
err:
if (!ob)
- ob = ERR_PTR(ret ?: -BCH_ERR_no_buckets_found);
+ ob = ERR_PTR(-BCH_ERR_no_buckets_found);
if (IS_ERR(ob)) {
trace_bucket_alloc_fail(ca, bch2_alloc_reserves[reserve],
@@ -590,6 +593,19 @@ err:
return ob;
}
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+ enum alloc_reserve reserve,
+ bool may_alloc_partial,
+ struct closure *cl)
+{
+ struct open_bucket *ob;
+
+ bch2_trans_do(c, NULL, NULL, 0,
+ PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(&trans, ca, reserve,
+ may_alloc_partial, cl)));
+ return ob;
+}
+
static int __dev_stripe_cmp(struct dev_stripe_state *stripe,
unsigned l, unsigned r)
{
@@ -655,7 +671,7 @@ static void add_new_bucket(struct bch_fs *c,
ob_push(c, ptrs, ob);
}
-int bch2_bucket_alloc_set(struct bch_fs *c,
+static int bch2_bucket_alloc_set_trans(struct btree_trans *trans,
struct open_buckets *ptrs,
struct dev_stripe_state *stripe,
struct bch_devs_mask *devs_may_alloc,
@@ -666,11 +682,12 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
unsigned flags,
struct closure *cl)
{
+ struct bch_fs *c = trans->c;
struct dev_alloc_list devs_sorted =
bch2_dev_alloc_list(c, stripe, devs_may_alloc);
unsigned dev;
struct bch_dev *ca;
- int ret = -BCH_ERR_insufficient_devices;
+ int ret = 0;
unsigned i;
BUG_ON(*nr_effective >= nr_replicas);
@@ -694,16 +711,15 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
continue;
}
- ob = bch2_bucket_alloc(c, ca, reserve,
+ ob = bch2_bucket_alloc_trans(trans, ca, reserve,
flags & BUCKET_MAY_ALLOC_PARTIAL, cl);
if (!IS_ERR(ob))
bch2_dev_stripe_increment(ca, stripe);
percpu_ref_put(&ca->ref);
- if (IS_ERR(ob)) {
- ret = PTR_ERR(ob);
-
- if (cl)
+ ret = PTR_ERR_OR_ZERO(ob);
+ if (ret) {
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || cl)
break;
continue;
}
@@ -711,15 +727,36 @@ int bch2_bucket_alloc_set(struct bch_fs *c,
add_new_bucket(c, ptrs, devs_may_alloc,
nr_effective, have_cache, flags, ob);
- if (*nr_effective >= nr_replicas) {
- ret = 0;
+ if (*nr_effective >= nr_replicas)
break;
- }
}
+ if (*nr_effective >= nr_replicas)
+ ret = 0;
+ else if (!ret)
+ ret = -BCH_ERR_insufficient_devices;
+
return ret;
}
+int bch2_bucket_alloc_set(struct bch_fs *c,
+ struct open_buckets *ptrs,
+ struct dev_stripe_state *stripe,
+ struct bch_devs_mask *devs_may_alloc,
+ unsigned nr_replicas,
+ unsigned *nr_effective,
+ bool *have_cache,
+ enum alloc_reserve reserve,
+ unsigned flags,
+ struct closure *cl)
+{
+ return bch2_trans_do(c, NULL, NULL, 0,
+ bch2_bucket_alloc_set_trans(&trans, ptrs, stripe,
+ devs_may_alloc, nr_replicas,
+ nr_effective, have_cache, reserve,
+ flags, cl));
+}
+
/* Allocate from stripes: */
/*
@@ -824,7 +861,7 @@ static void get_buckets_from_writepoint(struct bch_fs *c,
wp->ptrs = ptrs_skip;
}
-static int open_bucket_add_buckets(struct bch_fs *c,
+static int open_bucket_add_buckets(struct btree_trans *trans,
struct open_buckets *ptrs,
struct write_point *wp,
struct bch_devs_list *devs_have,
@@ -837,6 +874,7 @@ static int open_bucket_add_buckets(struct bch_fs *c,
unsigned flags,
struct closure *_cl)
{
+ struct bch_fs *c = trans->c;
struct bch_devs_mask devs;
struct open_bucket *ob;
struct closure *cl = NULL;
@@ -868,7 +906,8 @@ static int open_bucket_add_buckets(struct bch_fs *c,
target, erasure_code,
nr_replicas, nr_effective,
have_cache, flags, _cl);
- if (bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
+ if (bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
+ bch2_err_matches(ret, BCH_ERR_freelist_empty) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
return ret;
if (*nr_effective >= nr_replicas)
@@ -887,10 +926,11 @@ retry_blocking:
* Try nonblocking first, so that if one device is full we'll try from
* other devices:
*/
- ret = bch2_bucket_alloc_set(c, ptrs, &wp->stripe, &devs,
+ ret = bch2_bucket_alloc_set_trans(trans, ptrs, &wp->stripe, &devs,
nr_replicas, nr_effective, have_cache,
reserve, flags, cl);
if (ret &&
+ !bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_insufficient_devices) &&
!cl && _cl) {
cl = _cl;
@@ -1010,15 +1050,25 @@ static bool try_decrease_writepoints(struct bch_fs *c,
return true;
}
-static struct write_point *writepoint_find(struct bch_fs *c,
+static void bch2_trans_mutex_lock(struct btree_trans *trans,
+ struct mutex *lock)
+{
+ if (!mutex_trylock(lock)) {
+ bch2_trans_unlock(trans);
+ mutex_lock(lock);
+ }
+}
+
+static struct write_point *writepoint_find(struct btree_trans *trans,
unsigned long write_point)
{
+ struct bch_fs *c = trans->c;
struct write_point *wp, *oldest;
struct hlist_head *head;
if (!(write_point & 1UL)) {
wp = (struct write_point *) write_point;
- mutex_lock(&wp->lock);
+ bch2_trans_mutex_lock(trans, &wp->lock);
return wp;
}
@@ -1027,7 +1077,7 @@ restart_find:
wp = __writepoint_find(head, write_point);
if (wp) {
lock_wp:
- mutex_lock(&wp->lock);
+ bch2_trans_mutex_lock(trans, &wp->lock);
if (wp->write_point == write_point)
goto out;
mutex_unlock(&wp->lock);
@@ -1040,8 +1090,8 @@ restart_find_oldest:
if (!oldest || time_before64(wp->last_used, oldest->last_used))
oldest = wp;
- mutex_lock(&oldest->lock);
- mutex_lock(&c->write_points_hash_lock);
+ bch2_trans_mutex_lock(trans, &oldest->lock);
+ bch2_trans_mutex_lock(trans, &c->write_points_hash_lock);
if (oldest >= c->write_points + c->write_points_nr ||
try_increase_writepoints(c)) {
mutex_unlock(&c->write_points_hash_lock);
@@ -1069,7 +1119,7 @@ out:
/*
* Get us an open_bucket we can allocate from, return with it locked:
*/
-struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *trans,
unsigned target,
unsigned erasure_code,
struct write_point_specifier write_point,
@@ -1080,6 +1130,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
unsigned flags,
struct closure *cl)
{
+ struct bch_fs *c = trans->c;
struct write_point *wp;
struct open_bucket *ob;
struct open_buckets ptrs;
@@ -1099,7 +1150,7 @@ retry:
write_points_nr = c->write_points_nr;
have_cache = false;
- wp = writepoint_find(c, write_point.v);
+ wp = writepoint_find(trans, write_point.v);
if (wp->data_type == BCH_DATA_user)
ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
@@ -1109,21 +1160,22 @@ retry:
have_cache = true;
if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
- ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, cl);
} else {
- ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
ob_flags, NULL);
- if (!ret)
+ if (!ret ||
+ bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto alloc_done;
- ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have,
+ ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
0, erasure_code,
nr_replicas, &nr_effective,
&have_cache, reserve,
@@ -1180,6 +1232,32 @@ err:
return ERR_PTR(ret);
}
+struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
+ unsigned target,
+ unsigned erasure_code,
+ struct write_point_specifier write_point,
+ struct bch_devs_list *devs_have,
+ unsigned nr_replicas,
+ unsigned nr_replicas_required,
+ enum alloc_reserve reserve,
+ unsigned flags,
+ struct closure *cl)
+{
+ struct write_point *wp;
+
+ bch2_trans_do(c, NULL, NULL, 0,
+ PTR_ERR_OR_ZERO(wp = bch2_alloc_sectors_start_trans(&trans, target,
+ erasure_code,
+ write_point,
+ devs_have,
+ nr_replicas,
+ nr_replicas_required,
+ reserve,
+ flags, cl)));
+ return wp;
+
+}
+
struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
{
struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
diff --git a/libbcachefs/alloc_foreground.h b/libbcachefs/alloc_foreground.h
index 8bc78877..6de63a35 100644
--- a/libbcachefs/alloc_foreground.h
+++ b/libbcachefs/alloc_foreground.h
@@ -136,6 +136,14 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
unsigned, unsigned *, bool *, enum alloc_reserve,
unsigned, struct closure *);
+struct write_point *bch2_alloc_sectors_start_trans(struct btree_trans *,
+ unsigned, unsigned,
+ struct write_point_specifier,
+ struct bch_devs_list *,
+ unsigned, unsigned,
+ enum alloc_reserve,
+ unsigned,
+ struct closure *);
struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
unsigned, unsigned,
struct write_point_specifier,
diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c
index 5a46b25b..029b1ec1 100644
--- a/libbcachefs/backpointers.c
+++ b/libbcachefs/backpointers.c
@@ -492,7 +492,7 @@ static void backpointer_not_found(struct btree_trans *trans,
prt_printf(&buf, "\n ");
bch2_bkey_val_to_text(&buf, c, k);
if (!test_bit(BCH_FS_CHECK_BACKPOINTERS_DONE, &c->flags))
- bch_err(c, "%s", buf.buf);
+ bch_err_ratelimited(c, "%s", buf.buf);
else
bch2_trans_inconsistent(trans, "%s", buf.buf);
@@ -526,9 +526,21 @@ struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans,
if (extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp))
return k;
- backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
-
bch2_trans_iter_exit(trans, iter);
+
+ if (bp.level) {
+ /*
+ * If a backpointer for a btree node wasn't found, it may be
+ * because it was overwritten by a new btree node that hasn't
+ * been written out yet - backpointer_get_node() checks for
+ * this:
+ */
+ bch2_backpointer_get_node(trans, iter, bucket, bp_offset, bp);
+ bch2_trans_iter_exit(trans, iter);
+ return bkey_s_c_null;
+ }
+
+ backpointer_not_found(trans, bucket, bp_offset, bp, k, "extent");
return bkey_s_c_null;
}
@@ -540,7 +552,6 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
struct btree *b;
- struct bkey_s_c k;
BUG_ON(!bp.level);
@@ -551,22 +562,24 @@ struct btree *bch2_backpointer_get_node(struct btree_trans *trans,
bp.level - 1,
0);
b = bch2_btree_iter_peek_node(iter);
- if (IS_ERR(b)) {
- bch2_trans_iter_exit(trans, iter);
- return b;
- }
+ if (IS_ERR(b))
+ goto err;
if (extent_matches_bp(c, bp.btree_id, bp.level,
bkey_i_to_s_c(&b->key),
bucket, bp))
return b;
- if (!btree_node_will_make_reachable(b))
- backpointer_not_found(trans, bucket, bp_offset,
- bp, k, "btree node");
-
+ if (btree_node_will_make_reachable(b)) {
+ b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node);
+ } else {
+ backpointer_not_found(trans, bucket, bp_offset, bp,
+ bkey_i_to_s_c(&b->key), "btree node");
+ b = NULL;
+ }
+err:
bch2_trans_iter_exit(trans, iter);
- return NULL;
+ return b;
}
static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter,
@@ -829,6 +842,8 @@ static int check_one_backpointer(struct btree_trans *trans,
k = bch2_backpointer_get_key(trans, &iter, bucket, *bp_offset, bp);
ret = bkey_err(k);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ return 0;
if (ret)
return ret;
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 8ffdb4de..a5bf8087 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -319,8 +319,6 @@ BCH_DEBUG_PARAMS_DEBUG()
#undef BCH_DEBUG_PARAM
#endif
-#define BCH_LOCK_TIME_NR 128
-
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
x(btree_node_split) \
@@ -531,9 +529,13 @@ struct btree_debug {
unsigned id;
};
-struct lock_held_stats {
- struct time_stats times[BCH_LOCK_TIME_NR];
- const char *names[BCH_LOCK_TIME_NR];
+#define BCH_TRANSACTIONS_NR 128
+
+struct btree_transaction_stats {
+ struct mutex lock;
+ struct time_stats lock_hold_times;
+ unsigned nr_max_paths;
+ char *max_paths_text;
};
struct bch_fs_pcpu {
@@ -930,7 +932,8 @@ struct bch_fs {
struct time_stats times[BCH_TIME_STAT_NR];
- struct lock_held_stats lock_held_stats;
+ const char *btree_transaction_fns[BCH_TRANSACTIONS_NR];
+ struct btree_transaction_stats btree_transaction_stats[BCH_TRANSACTIONS_NR];
};
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
diff --git a/libbcachefs/bkey.c b/libbcachefs/bkey.c
index cc068963..d348175e 100644
--- a/libbcachefs/bkey.c
+++ b/libbcachefs/bkey.c
@@ -19,33 +19,49 @@ const struct bkey_format bch2_bkey_format_current = BKEY_FORMAT_CURRENT;
struct bkey __bch2_bkey_unpack_key(const struct bkey_format *,
const struct bkey_packed *);
-void bch2_to_binary(char *out, const u64 *p, unsigned nr_bits)
+void bch2_bkey_packed_to_binary_text(struct printbuf *out,
+ const struct bkey_format *f,
+ const struct bkey_packed *k)
{
- unsigned bit = high_bit_offset, done = 0;
+ const u64 *p = high_word(f, k);
+ unsigned word_bits = 64 - high_bit_offset;
+ unsigned nr_key_bits = bkey_format_key_bits(f) + high_bit_offset;
+ u64 v = *p & (~0ULL >> high_bit_offset);
+
+ if (!nr_key_bits) {
+ prt_str(out, "(empty)");
+ return;
+ }
while (1) {
- while (bit < 64) {
- if (done && !(done % 8))
- *out++ = ' ';
- *out++ = *p & (1ULL << (63 - bit)) ? '1' : '0';
- bit++;
- done++;
- if (done == nr_bits) {
- *out++ = '\0';
- return;
- }
+ unsigned next_key_bits = nr_key_bits;
+
+ if (nr_key_bits < 64) {
+ v >>= 64 - nr_key_bits;
+ next_key_bits = 0;
+ } else {
+ next_key_bits -= 64;
}
+ bch2_prt_u64_binary(out, v, min(word_bits, nr_key_bits));
+
+ if (!next_key_bits)
+ break;
+
+ prt_char(out, ' ');
+
p = next_word(p);
- bit = 0;
+ v = *p;
+ word_bits = 64;
+ nr_key_bits = next_key_bits;
}
}
#ifdef CONFIG_BCACHEFS_DEBUG
static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
- const struct bkey *unpacked,
- const struct bkey_format *format)
+ const struct bkey *unpacked,
+ const struct bkey_format *format)
{
struct bkey tmp;
@@ -57,23 +73,35 @@ static void bch2_bkey_pack_verify(const struct bkey_packed *packed,
tmp = __bch2_bkey_unpack_key(format, packed);
if (memcmp(&tmp, unpacked, sizeof(struct bkey))) {
- struct printbuf buf1 = PRINTBUF;
- struct printbuf buf2 = PRINTBUF;
- char buf3[160], buf4[160];
+ struct printbuf buf = PRINTBUF;
- bch2_bkey_to_text(&buf1, unpacked);
- bch2_bkey_to_text(&buf2, &tmp);
- bch2_to_binary(buf3, (void *) unpacked, 80);
- bch2_to_binary(buf4, high_word(format, packed), 80);
-
- panic("keys differ: format u64s %u fields %u %u %u %u %u\n%s\n%s\n%s\n%s\n",
+ prt_printf(&buf, "keys differ: format u64s %u fields %u %u %u %u %u\n",
format->key_u64s,
format->bits_per_field[0],
format->bits_per_field[1],
format->bits_per_field[2],
format->bits_per_field[3],
- format->bits_per_field[4],
- buf1.buf, buf2.buf, buf3, buf4);
+ format->bits_per_field[4]);
+
+ prt_printf(&buf, "compiled unpack: ");
+ bch2_bkey_to_text(&buf, unpacked);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "c unpack: ");
+ bch2_bkey_to_text(&buf, &tmp);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "compiled unpack: ");
+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
+ (struct bkey_packed *) unpacked);
+ prt_newline(&buf);
+
+ prt_printf(&buf, "c unpack: ");
+ bch2_bkey_packed_to_binary_text(&buf, &bch2_bkey_format_current,
+ (struct bkey_packed *) &tmp);
+ prt_newline(&buf);
+
+ panic("%s", buf.buf);
}
}
diff --git a/libbcachefs/bkey.h b/libbcachefs/bkey.h
index 7dee3d8e..df9fb859 100644
--- a/libbcachefs/bkey.h
+++ b/libbcachefs/bkey.h
@@ -12,7 +12,9 @@
#define HAVE_BCACHEFS_COMPILED_UNPACK 1
#endif
-void bch2_to_binary(char *, const u64 *, unsigned);
+void bch2_bkey_packed_to_binary_text(struct printbuf *,
+ const struct bkey_format *,
+ const struct bkey_packed *);
/* bkey with split value, const */
struct bkey_s_c {
@@ -42,12 +44,15 @@ static inline size_t bkey_val_bytes(const struct bkey *k)
static inline void set_bkey_val_u64s(struct bkey *k, unsigned val_u64s)
{
- k->u64s = BKEY_U64s + val_u64s;
+ unsigned u64s = BKEY_U64s + val_u64s;
+
+ BUG_ON(u64s > U8_MAX);
+ k->u64s = u64s;
}
static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes)
{
- k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64));
+ set_bkey_val_u64s(k, DIV_ROUND_UP(bytes, sizeof(u64)));
}
#define bkey_val_end(_k) ((void *) (((u64 *) (_k).v) + bkey_val_u64s((_k).k)))
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index ae731b3a..8aad87ea 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -616,7 +616,6 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
(u64 *) vstruct_end(i) - (u64 *) k);
i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - shift);
set_btree_bset_end(b, t);
- bch2_bset_set_no_aux_tree(b, t);
}
for (k = i->start; k != vstruct_last(i); k = bkey_next(k))
@@ -626,10 +625,14 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
if (k != vstruct_last(i)) {
i->u64s = cpu_to_le16((u64 *) k - (u64 *) i->start);
set_btree_bset_end(b, t);
- bch2_bset_set_no_aux_tree(b, t);
}
}
+ /*
+ * Always rebuild search trees: eytzinger search tree nodes directly
+ * depend on the values of min/max key:
+ */
+ bch2_bset_set_no_aux_tree(b, b->set);
bch2_btree_build_aux_trees(b);
for_each_btree_node_key_unpack(b, k, &iter, &unpacked) {
@@ -778,8 +781,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
}
static int validate_bset_keys(struct bch_fs *c, struct btree *b,
- struct bset *i, unsigned *whiteout_u64s,
- int write, bool have_retry)
+ struct bset *i, int write, bool have_retry)
{
unsigned version = le16_to_cpu(i->version);
struct bkey_packed *k, *prev = NULL;
@@ -915,7 +917,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
}
while (b->written < (ptr_written ?: btree_sectors(c))) {
- unsigned sectors, whiteout_u64s = 0;
+ unsigned sectors;
struct nonce nonce;
struct bch_csum csum;
bool first = !b->written;
@@ -984,8 +986,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (!b->written)
btree_node_set_format(b, b->data->format);
- ret = validate_bset_keys(c, b, i, &whiteout_u64s,
- READ, have_retry);
+ ret = validate_bset_keys(c, b, i, READ, have_retry);
if (ret)
goto fsck_err;
@@ -1011,11 +1012,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (blacklisted && !first)
continue;
- sort_iter_add(iter, i->start,
- vstruct_idx(i, whiteout_u64s));
-
sort_iter_add(iter,
- vstruct_idx(i, whiteout_u64s),
+ vstruct_idx(i, 0),
vstruct_last(i));
nonblacklisted_written = b->written;
@@ -1745,7 +1743,6 @@ static void btree_node_write_endio(struct bio *bio)
static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
struct bset *i, unsigned sectors)
{
- unsigned whiteout_u64s = 0;
struct printbuf buf = PRINTBUF;
int ret;
@@ -1758,7 +1755,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (ret)
return ret;
- ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
+ ret = validate_bset_keys(c, b, i, WRITE, false) ?:
validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false);
if (ret) {
bch2_inconsistent_error(c);
diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c
index 04a61318..1d4b9fde 100644
--- a/libbcachefs/btree_iter.c
+++ b/libbcachefs/btree_iter.c
@@ -1418,16 +1418,16 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
if (unlikely(ret))
goto err;
- mark_btree_node_locked(trans, path, level, lock_type);
- btree_path_level_init(trans, path, b);
-
if (likely(replay_done && tmp.k->k.type == KEY_TYPE_btree_ptr_v2) &&
unlikely(b != btree_node_mem_ptr(tmp.k)))
btree_node_mem_ptr_set(trans, path, level + 1, b);
if (btree_node_read_locked(path, level + 1))
btree_node_unlock(trans, path, level + 1);
+
+ mark_btree_node_locked(trans, path, level, lock_type);
path->level = level;
+ btree_path_level_init(trans, path, b);
bch2_btree_path_verify_locks(path);
err:
@@ -1872,42 +1872,69 @@ void bch2_dump_trans_updates(struct btree_trans *trans)
printbuf_exit(&buf);
}
-noinline __cold
-void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+void bch2_btree_path_to_text(struct printbuf *out, struct btree_path *path)
+{
+ prt_printf(out, "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos ",
+ path->idx, path->ref, path->intent_ref,
+ path->preserve ? 'P' : ' ',
+ path->should_be_locked ? 'S' : ' ',
+ bch2_btree_ids[path->btree_id],
+ path->level);
+ bch2_bpos_to_text(out, path->pos);
+
+ prt_printf(out, " locks %u", path->nodes_locked);
+#ifdef CONFIG_BCACHEFS_DEBUG
+ prt_printf(out, " %pS", (void *) path->ip_allocated);
+#endif
+ prt_newline(out);
+}
+
+void bch2_trans_paths_to_text(struct printbuf *out, struct btree_trans *trans)
{
struct btree_path *path;
- struct printbuf buf = PRINTBUF;
unsigned idx;
- trans_for_each_path_inorder(trans, path, idx) {
- printbuf_reset(&buf);
+ trans_for_each_path_inorder(trans, path, idx)
+ bch2_btree_path_to_text(out, path);
+}
- bch2_bpos_to_text(&buf, path->pos);
+noinline __cold
+void bch2_dump_trans_paths_updates(struct btree_trans *trans)
+{
+ struct printbuf buf = PRINTBUF;
- printk(KERN_ERR "path: idx %2u ref %u:%u %c %c btree=%s l=%u pos %s locks %u %pS\n",
- path->idx, path->ref, path->intent_ref,
- path->preserve ? 'P' : ' ',
- path->should_be_locked ? 'S' : ' ',
- bch2_btree_ids[path->btree_id],
- path->level,
- buf.buf,
- path->nodes_locked,
-#ifdef CONFIG_BCACHEFS_DEBUG
- (void *) path->ip_allocated
-#else
- NULL
-#endif
- );
- }
+ bch2_trans_paths_to_text(&buf, trans);
+ printk(KERN_ERR "%s", buf.buf);
printbuf_exit(&buf);
bch2_dump_trans_updates(trans);
}
+noinline
+static void bch2_trans_update_max_paths(struct btree_trans *trans)
+{
+ struct btree_transaction_stats *s = btree_trans_stats(trans);
+ struct printbuf buf = PRINTBUF;
+
+ bch2_trans_paths_to_text(&buf, trans);
+
+ if (!buf.allocation_failure) {
+ mutex_lock(&s->lock);
+ if (s->nr_max_paths < hweight64(trans->paths_allocated)) {
+ s->nr_max_paths = hweight64(trans->paths_allocated);
+ swap(s->max_paths_text, buf.buf);
+ }
+ mutex_unlock(&s->lock);
+ }
+
+ printbuf_exit(&buf);
+}
+
static struct btree_path *btree_path_alloc(struct btree_trans *trans,
struct btree_path *pos)
{
+ struct btree_transaction_stats *s = btree_trans_stats(trans);
struct btree_path *path;
unsigned idx;
@@ -1920,6 +1947,9 @@ static struct btree_path *btree_path_alloc(struct btree_trans *trans,
idx = __ffs64(~trans->paths_allocated);
trans->paths_allocated |= 1ULL << idx;
+ if (s && unlikely(hweight64(trans->paths_allocated) > s->nr_max_paths))
+ bch2_trans_update_max_paths(trans);
+
path = &trans->paths[idx];
path->idx = idx;
@@ -2013,12 +2043,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
struct bkey_s_c k;
+ EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
+ EBUG_ON(!btree_node_locked(path, path->level));
+
if (!path->cached) {
struct btree_path_level *l = path_l(path);
struct bkey_packed *_k;
- EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
-
_k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
@@ -2033,7 +2064,6 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
(path->btree_id != ck->key.btree_id ||
bkey_cmp(path->pos, ck->key.pos)));
EBUG_ON(!ck || !ck->valid);
- EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
*u = ck->k->k;
k = bkey_i_to_s_c(ck->k);
@@ -2288,7 +2318,7 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
* bkey_s_c_null:
*/
static noinline
-struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
+struct bkey_s_c __btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
{
struct btree_trans *trans = iter->trans;
struct bch_fs *c = trans->c;
@@ -2317,6 +2347,15 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
}
+static noinline
+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
+{
+ struct bkey_s_c ret = __btree_trans_peek_key_cache(iter, pos);
+ int err = bkey_err(ret) ?: bch2_btree_path_relock(iter->trans, iter->path, _THIS_IP_);
+
+ return err ? bkey_s_c_err(err) : ret;
+}
+
static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
{
struct btree_trans *trans = iter->trans;
@@ -2347,15 +2386,12 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
k.k &&
(k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
- ret = bkey_err(k2);
+ k = k2;
+ ret = bkey_err(k);
if (ret) {
- k = k2;
bch2_btree_iter_set_pos(iter, iter->pos);
goto out;
}
-
- k = k2;
- iter->k = *k.k;
}
if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
@@ -2803,8 +2839,10 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
btree_iter_ip_allocated(iter));
ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
if ((iter->flags & BTREE_ITER_CACHED) ||
!(iter->flags & (BTREE_ITER_IS_EXTENTS|BTREE_ITER_FILTER_SNAPSHOTS))) {
@@ -2828,13 +2866,11 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
}
if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
- (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
- if (bkey_err(k)) {
- goto out_no_locked;
- } else {
+ (k = __btree_trans_peek_key_cache(iter, iter->pos)).k) {
+ if (!bkey_err(k))
iter->k = *k.k;
- goto out;
- }
+ /* We're not returning a key from iter->path: */
+ goto out_no_locked;
}
k = bch2_btree_path_peek_slot(iter->path, &iter->k);
@@ -2862,11 +2898,14 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
struct bpos pos = iter->pos;
k = bch2_btree_iter_peek(iter);
- iter->pos = pos;
+ if (unlikely(bkey_err(k)))
+ bch2_btree_iter_set_pos(iter, pos);
+ else
+ iter->pos = pos;
}
if (unlikely(bkey_err(k)))
- return k;
+ goto out_no_locked;
next = k.k ? bkey_start_pos(k.k) : POS_MAX;
@@ -3195,6 +3234,7 @@ u32 bch2_trans_begin(struct btree_trans *trans)
bch2_trans_reset_updates(trans);
+ trans->restart_count++;
trans->mem_top = 0;
if (trans->fs_usage_deltas) {
@@ -3245,10 +3285,10 @@ u32 bch2_trans_begin(struct btree_trans *trans)
void bch2_trans_verify_not_restarted(struct btree_trans *trans, u32 restart_count)
{
- bch2_trans_inconsistent_on(trans_was_restarted(trans, restart_count), trans,
- "trans->restart_count %u, should be %u, last restarted by %ps\n",
- trans->restart_count, restart_count,
- (void *) trans->last_restarted_ip);
+ if (trans_was_restarted(trans, restart_count))
+ panic("trans->restart_count %u, should be %u, last restarted by %pS\n",
+ trans->restart_count, restart_count,
+ (void *) trans->last_restarted_ip);
}
static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
@@ -3269,6 +3309,22 @@ static void bch2_trans_alloc_paths(struct btree_trans *trans, struct bch_fs *c)
trans->updates = p; p += updates_bytes;
}
+static inline unsigned bch2_trans_get_fn_idx(struct btree_trans *trans, struct bch_fs *c,
+ const char *fn)
+{
+ unsigned i;
+
+ for (i = 0; i < ARRAY_SIZE(c->btree_transaction_fns); i++)
+ if (!c->btree_transaction_fns[i] ||
+ c->btree_transaction_fns[i] == fn) {
+ c->btree_transaction_fns[i] = fn;
+ return i;
+ }
+
+ pr_warn_once("BCH_TRANSACTIONS_NR not big enough!");
+ return i;
+}
+
void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
unsigned expected_nr_iters,
size_t expected_mem_bytes,
@@ -3284,15 +3340,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
trans->fn = fn;
trans->last_begin_time = ktime_get_ns();
trans->task = current;
-
- while (c->lock_held_stats.names[trans->lock_name_idx] != fn
- && c->lock_held_stats.names[trans->lock_name_idx] != 0)
- trans->lock_name_idx++;
-
- if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
- pr_warn_once("lock_times array not big enough!");
- else
- c->lock_held_stats.names[trans->lock_name_idx] = fn;
+ trans->fn_idx = bch2_trans_get_fn_idx(trans, c, fn);
bch2_trans_alloc_paths(trans, c);
@@ -3463,9 +3511,12 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
int bch2_fs_btree_iter_init(struct bch_fs *c)
{
- unsigned nr = BTREE_ITER_MAX;
+ unsigned i, nr = BTREE_ITER_MAX;
int ret;
+ for (i = 0; i < ARRAY_SIZE(c->btree_transaction_stats); i++)
+ mutex_init(&c->btree_transaction_stats[i].lock);
+
INIT_LIST_HEAD(&c->btree_trans_list);
mutex_init(&c->btree_trans_lock);
diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h
index f38fd25b..6ad28ff6 100644
--- a/libbcachefs/btree_iter.h
+++ b/libbcachefs/btree_iter.h
@@ -182,7 +182,6 @@ static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int er
BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));
trans->restarted = err;
- trans->restart_count++;
return -err;
}
@@ -368,7 +367,7 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
- if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX) {
+ if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX / 2) {
trace_trans_restart_too_many_iters(trans, _THIS_IP_);
return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
}
@@ -392,13 +391,17 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
#define lockrestart_do(_trans, _do) \
({ \
+ u32 _restart_count; \
int _ret; \
\
do { \
- bch2_trans_begin(_trans); \
+ _restart_count = bch2_trans_begin(_trans); \
_ret = (_do); \
} while (bch2_err_matches(_ret, BCH_ERR_transaction_restart)); \
\
+ if (!_ret) \
+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
+ \
_ret; \
})
@@ -439,7 +442,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
(_start), (_flags)); \
\
while (1) { \
- bch2_trans_begin(_trans); \
+ u32 _restart_count = bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_type(&(_iter), (_flags)); \
if (!(_k).k) { \
_ret = 0; \
@@ -451,6 +454,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
continue; \
if (_ret) \
break; \
+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_advance(&(_iter))) \
break; \
} \
@@ -468,7 +472,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
(_start), (_flags)); \
\
while (1) { \
- bch2_trans_begin(_trans); \
+ u32 _restart_count = bch2_trans_begin(_trans); \
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
if (!(_k).k) { \
_ret = 0; \
@@ -480,6 +484,7 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
continue; \
if (_ret) \
break; \
+ bch2_trans_verify_not_restarted(_trans, _restart_count);\
if (!bch2_btree_iter_rewind(&(_iter))) \
break; \
} \
@@ -535,6 +540,8 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
/* new multiple iterator interface: */
void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
+void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
+void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *,
diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c
index fa90581f..38b16f95 100644
--- a/libbcachefs/btree_key_cache.c
+++ b/libbcachefs/btree_key_cache.c
@@ -631,11 +631,22 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
void bch2_btree_key_cache_drop(struct btree_trans *trans,
struct btree_path *path)
{
+ struct bch_fs *c = trans->c;
struct bkey_cached *ck = (void *) path->l[0].b;
- ck->valid = false;
+ BUG_ON(!ck->valid);
- BUG_ON(test_bit(BKEY_CACHED_DIRTY, &ck->flags));
+ /*
+ * We just did an update to the btree, bypassing the key cache: the key
+ * cache key is now stale and must be dropped, even if dirty:
+ */
+ if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+ clear_bit(BKEY_CACHED_DIRTY, &ck->flags);
+ atomic_long_dec(&c->btree_key_cache.nr_dirty);
+ bch2_journal_pin_drop(&c->journal, &ck->journal);
+ }
+
+ ck->valid = false;
}
static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index c3f3cb87..205c6b59 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -115,6 +115,26 @@ btree_lock_want(struct btree_path *path, int level)
return BTREE_NODE_UNLOCKED;
}
+static inline struct btree_transaction_stats *btree_trans_stats(struct btree_trans *trans)
+{
+ return trans->fn_idx < ARRAY_SIZE(trans->c->btree_transaction_stats)
+ ? &trans->c->btree_transaction_stats[trans->fn_idx]
+ : NULL;
+}
+
+static void btree_trans_lock_hold_time_update(struct btree_trans *trans,
+ struct btree_path *path, unsigned level)
+{
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ struct btree_transaction_stats *s = btree_trans_stats(trans);
+
+ if (s)
+ __bch2_time_stats_update(&s->lock_hold_times,
+ path->l[level].lock_taken_time,
+ ktime_get_ns());
+#endif
+}
+
static inline void btree_node_unlock(struct btree_trans *trans,
struct btree_path *path, unsigned level)
{
@@ -124,15 +144,7 @@ static inline void btree_node_unlock(struct btree_trans *trans,
if (lock_type != BTREE_NODE_UNLOCKED) {
six_unlock_type(&path->l[level].b->c.lock, lock_type);
-#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
- if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
- struct bch_fs *c = trans->c;
-
- __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
- path->l[level].lock_taken_time,
- ktime_get_ns());
- }
-#endif
+ btree_trans_lock_hold_time_update(trans, path, level);
}
mark_btree_node_unlocked(path, level);
}
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 1ff99917..21d76181 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -392,6 +392,7 @@ struct btree_trans {
struct task_struct *task;
int srcu_idx;
+ u8 fn_idx;
u8 nr_sorted;
u8 nr_updates;
u8 traverse_all_idx;
@@ -432,7 +433,6 @@ struct btree_trans {
unsigned journal_u64s;
unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
- int lock_name_idx;
};
#define BTREE_FLAGS() \
diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c
index e4138614..0409737f 100644
--- a/libbcachefs/btree_update_interior.c
+++ b/libbcachefs/btree_update_interior.c
@@ -178,12 +178,13 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
six_unlock_intent(&b->c.lock);
}
-static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
+static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
struct disk_reservation *res,
struct closure *cl,
bool interior_node,
unsigned flags)
{
+ struct bch_fs *c = trans->c;
struct write_point *wp;
struct btree *b;
__BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp;
@@ -213,7 +214,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
mutex_unlock(&c->btree_reserve_cache_lock);
retry:
- wp = bch2_alloc_sectors_start(c,
+ wp = bch2_alloc_sectors_start_trans(trans,
c->opts.metadata_target ?:
c->opts.foreground_target,
0,
@@ -412,18 +413,16 @@ static void bch2_btree_reserve_put(struct btree_update *as)
}
}
-static int bch2_btree_reserve_get(struct btree_update *as,
+static int bch2_btree_reserve_get(struct btree_trans *trans,
+ struct btree_update *as,
unsigned nr_nodes[2],
- unsigned flags)
+ unsigned flags,
+ struct closure *cl)
{
struct bch_fs *c = as->c;
- struct closure cl;
struct btree *b;
unsigned interior;
- int ret;
-
- closure_init_stack(&cl);
-retry:
+ int ret = 0;
BUG_ON(nr_nodes[0] + nr_nodes[1] > BTREE_RESERVE_MAX);
@@ -434,18 +433,17 @@ retry:
* BTREE_INSERT_NOWAIT only applies to btree node allocation, not
* blocking on this lock:
*/
- ret = bch2_btree_cache_cannibalize_lock(c, &cl);
+ ret = bch2_btree_cache_cannibalize_lock(c, cl);
if (ret)
- goto err;
+ return ret;
for (interior = 0; interior < 2; interior++) {
struct prealloc_nodes *p = as->prealloc_nodes + interior;
while (p->nr < nr_nodes[interior]) {
- b = __bch2_btree_node_alloc(c, &as->disk_res,
- flags & BTREE_INSERT_NOWAIT
- ? NULL : &cl,
- interior, flags);
+ b = __bch2_btree_node_alloc(trans, &as->disk_res,
+ flags & BTREE_INSERT_NOWAIT ? NULL : cl,
+ interior, flags);
if (IS_ERR(b)) {
ret = PTR_ERR(b);
goto err;
@@ -454,18 +452,8 @@ retry:
p->b[p->nr++] = b;
}
}
-
- bch2_btree_cache_cannibalize_unlock(c);
- closure_sync(&cl);
- return 0;
err:
bch2_btree_cache_cannibalize_unlock(c);
- closure_sync(&cl);
-
- if (ret == -EAGAIN)
- goto retry;
-
- trace_btree_reserve_get_fail(c, nr_nodes[0] + nr_nodes[1], &cl);
return ret;
}
@@ -980,6 +968,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
unsigned update_level = level;
int journal_flags = flags & JOURNAL_WATERMARK_MASK;
int ret = 0;
+ u32 restart_count = trans->restart_count;
BUG_ON(!path->should_be_locked);
@@ -1053,16 +1042,24 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
- bch2_trans_unlock(trans);
-
ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
BTREE_UPDATE_JOURNAL_RES,
- journal_flags);
+ journal_flags|JOURNAL_RES_GET_NONBLOCK);
if (ret) {
- bch2_btree_update_free(as);
- trace_trans_restart_journal_preres_get(trans, _RET_IP_);
- ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
- return ERR_PTR(ret);
+ bch2_trans_unlock(trans);
+
+ ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
+ BTREE_UPDATE_JOURNAL_RES,
+ journal_flags);
+ if (ret) {
+ trace_trans_restart_journal_preres_get(trans, _RET_IP_);
+ ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_journal_preres_get);
+ goto err;
+ }
+
+ ret = bch2_trans_relock(trans);
+ if (ret)
+ goto err;
}
ret = bch2_disk_reservation_get(c, &as->disk_res,
@@ -1072,14 +1069,32 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
if (ret)
goto err;
- ret = bch2_btree_reserve_get(as, nr_nodes, flags);
- if (ret)
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, NULL);
+ if (ret == -EAGAIN ||
+ ret == -ENOMEM) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+
+ bch2_trans_unlock(trans);
+
+ do {
+ ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl);
+ closure_sync(&cl);
+ } while (ret == -EAGAIN);
+ }
+
+ if (ret) {
+ trace_btree_reserve_get_fail(trans->fn, _RET_IP_,
+ nr_nodes[0] + nr_nodes[1]);
goto err;
+ }
ret = bch2_trans_relock(trans);
if (ret)
goto err;
+ bch2_trans_verify_not_restarted(trans, restart_count);
return as;
err:
bch2_btree_update_free(as);
diff --git a/libbcachefs/debug.c b/libbcachefs/debug.c
index cd37a101..f35e714e 100644
--- a/libbcachefs/debug.c
+++ b/libbcachefs/debug.c
@@ -199,7 +199,7 @@ struct dump_iter {
ssize_t ret; /* bytes read so far */
};
-static int flush_buf(struct dump_iter *i)
+static ssize_t flush_buf(struct dump_iter *i)
{
if (i->buf.pos) {
size_t bytes = min_t(size_t, i->buf.pos, i->size);
@@ -215,7 +215,7 @@ static int flush_buf(struct dump_iter *i)
memmove(i->buf.buf, i->buf.buf + bytes, i->buf.pos);
}
- return 0;
+ return i->size ? 0 : i->ret;
}
static int bch2_dump_open(struct inode *inode, struct file *file)
@@ -253,7 +253,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- int err;
+ ssize_t ret;
i->ubuf = buf;
i->size = size;
@@ -261,14 +261,11 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0);
- err = for_each_btree_key2(&trans, iter, i->id, i->from,
+ ret = for_each_btree_key2(&trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
- err = flush_buf(i);
- if (err)
- break;
-
- if (!i->size)
+ ret = flush_buf(i);
+ if (ret)
break;
bch2_bkey_val_to_text(&i->buf, i->c, k);
@@ -277,12 +274,12 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
}));
i->from = iter.pos;
- if (!err)
- err = flush_buf(i);
+ if (!ret)
+ ret = flush_buf(i);
bch2_trans_exit(&trans);
- return err ?: i->ret;
+ return ret ?: i->ret;
}
static const struct file_operations btree_debug_ops = {
@@ -299,43 +296,39 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct btree *b;
- int err;
+ ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
- err = flush_buf(i);
- if (err)
- return err;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
- if (!i->size || !bpos_cmp(SPOS_MAX, i->from))
+ if (!bpos_cmp(SPOS_MAX, i->from))
return i->ret;
bch2_trans_init(&trans, i->c, 0, 0);
- for_each_btree_node(&trans, iter, i->id, i->from, 0, b, err) {
- bch2_btree_node_to_text(&i->buf, i->c, b);
- err = flush_buf(i);
- if (err)
+ for_each_btree_node(&trans, iter, i->id, i->from, 0, b, ret) {
+ ret = flush_buf(i);
+ if (ret)
break;
- /*
- * can't easily correctly restart a btree node traversal across
- * all nodes, meh
- */
+ bch2_btree_node_to_text(&i->buf, i->c, b);
i->from = bpos_cmp(SPOS_MAX, b->key.k.p)
? bpos_successor(b->key.k.p)
: b->key.k.p;
-
- if (!i->size)
- break;
}
bch2_trans_iter_exit(&trans, &iter);
bch2_trans_exit(&trans);
- return err < 0 ? err : i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations btree_format_debug_ops = {
@@ -352,33 +345,27 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
struct btree_trans trans;
struct btree_iter iter;
struct bkey_s_c k;
- int err;
+ ssize_t ret;
i->ubuf = buf;
i->size = size;
i->ret = 0;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- return i->ret;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
bch2_trans_init(&trans, i->c, 0, 0);
- err = for_each_btree_key2(&trans, iter, i->id, i->from,
+ ret = for_each_btree_key2(&trans, iter, i->id, i->from,
BTREE_ITER_PREFETCH|
BTREE_ITER_ALL_SNAPSHOTS, k, ({
struct btree_path_level *l = &iter.path->l[0];
struct bkey_packed *_k =
bch2_btree_node_iter_peek(&l->iter, l->b);
- err = flush_buf(i);
- if (err)
- break;
-
- if (!i->size)
+ ret = flush_buf(i);
+ if (ret)
break;
if (bpos_cmp(l->b->key.k.p, i->prev_node) > 0) {
@@ -391,12 +378,12 @@ static ssize_t bch2_read_bfloat_failed(struct file *file, char __user *buf,
}));
i->from = iter.pos;
- if (!err)
- err = flush_buf(i);
-
bch2_trans_exit(&trans);
- return err ?: i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations bfloat_failed_debug_ops = {
@@ -409,7 +396,8 @@ static const struct file_operations bfloat_failed_debug_ops = {
static void bch2_cached_btree_node_to_text(struct printbuf *out, struct bch_fs *c,
struct btree *b)
{
- out->tabstops[0] = 32;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 32);
prt_printf(out, "%px btree=%s l=%u ",
b,
@@ -466,7 +454,7 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
bool done = false;
- int err;
+ ssize_t ret = 0;
i->ubuf = buf;
i->size = size;
@@ -477,12 +465,9 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
struct rhash_head *pos;
struct btree *b;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- break;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
rcu_read_lock();
i->buf.atomic++;
@@ -500,9 +485,12 @@ static ssize_t bch2_cached_btree_nodes_read(struct file *file, char __user *buf,
} while (!done);
if (i->buf.allocation_failure)
- return -ENOMEM;
+ ret = -ENOMEM;
- return i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations cached_btree_nodes_ops = {
@@ -538,7 +526,7 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct dump_iter *i = file->private_data;
struct bch_fs *c = i->c;
struct btree_trans *trans;
- int err;
+ ssize_t ret = 0;
i->ubuf = buf;
i->size = size;
@@ -549,12 +537,9 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
if (trans->task->pid <= i->iter)
continue;
- err = flush_buf(i);
- if (err)
- return err;
-
- if (!i->size)
- break;
+ ret = flush_buf(i);
+ if (ret)
+ return ret;
bch2_btree_trans_to_text(&i->buf, trans);
@@ -570,9 +555,12 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
mutex_unlock(&c->btree_trans_lock);
if (i->buf.allocation_failure)
- return -ENOMEM;
+ ret = -ENOMEM;
- return i->ret;
+ if (!ret)
+ ret = flush_buf(i);
+
+ return ret ?: i->ret;
}
static const struct file_operations btree_transactions_ops = {
@@ -651,14 +639,16 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
size_t size, loff_t *ppos)
{
struct dump_iter *i = file->private_data;
- struct lock_held_stats *lhs = &i->c->lock_held_stats;
+ struct bch_fs *c = i->c;
int err;
i->ubuf = buf;
i->size = size;
i->ret = 0;
- while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
+ while (1) {
+ struct btree_transaction_stats *s = &c->btree_transaction_stats[i->iter];
+
err = flush_buf(i);
if (err)
return err;
@@ -666,11 +656,37 @@ static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
if (!i->size)
break;
- prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
+ if (i->iter == ARRAY_SIZE(c->btree_transaction_fns) ||
+ !c->btree_transaction_fns[i->iter])
+ break;
+
+ prt_printf(&i->buf, "%s: ", c->btree_transaction_fns[i->iter]);
prt_newline(&i->buf);
- printbuf_indent_add(&i->buf, 8);
- bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
- printbuf_indent_sub(&i->buf, 8);
+ printbuf_indent_add(&i->buf, 2);
+
+ mutex_lock(&s->lock);
+
+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+ prt_printf(&i->buf, "Lock hold times:");
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ bch2_time_stats_to_text(&i->buf, &s->lock_hold_times);
+ printbuf_indent_sub(&i->buf, 2);
+ }
+
+ if (s->max_paths_text) {
+ prt_printf(&i->buf, "Maximum allocated btree paths (%u):", s->nr_max_paths);
+ prt_newline(&i->buf);
+
+ printbuf_indent_add(&i->buf, 2);
+ prt_str_indented(&i->buf, s->max_paths_text);
+ printbuf_indent_sub(&i->buf, 2);
+ }
+
+ mutex_unlock(&s->lock);
+
+ printbuf_indent_sub(&i->buf, 2);
prt_newline(&i->buf);
i->iter++;
}
@@ -716,10 +732,8 @@ void bch2_fs_debug_init(struct bch_fs *c)
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
- if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
- debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
- c, &lock_held_stats_op);
- }
+ debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
+ c, &lock_held_stats_op);
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h
index 15a1be2f..232f7c79 100644
--- a/libbcachefs/errcode.h
+++ b/libbcachefs/errcode.h
@@ -37,6 +37,7 @@
x(no_btree_node, no_btree_node_down) \
x(no_btree_node, no_btree_node_init) \
x(no_btree_node, no_btree_node_cached) \
+ x(0, backpointer_to_overwritten_btree_node) \
x(0, lock_fail_node_reused) \
x(0, lock_fail_root_changed) \
x(0, journal_reclaim_would_deadlock) \
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index c93e177a..1a841146 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -290,7 +290,7 @@ err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
goto retry;
- return ret;
+ return ret ?: -BCH_ERR_transaction_restart_nested;
}
static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
@@ -914,7 +914,7 @@ static int check_inode(struct btree_trans *trans,
bch2_fs_lazy_rw(c);
ret = fsck_inode_rm(trans, u.bi_inum, iter->pos.snapshot);
- if (ret)
+ if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch_err(c, "error in fsck: error while deleting inode: %s",
bch2_err_str(ret));
return ret;
@@ -1149,13 +1149,11 @@ static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w)
}
}
fsck_err:
- if (ret) {
+ if (ret)
bch_err(c, "error from check_i_sectors(): %s", bch2_err_str(ret));
- return ret;
- }
- if (trans_was_restarted(trans, restart_count))
- return -BCH_ERR_transaction_restart_nested;
- return 0;
+ if (!ret && trans_was_restarted(trans, restart_count))
+ ret = -BCH_ERR_transaction_restart_nested;
+ return ret;
}
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index d77092aa..3f1cf1ac 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1255,8 +1255,9 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
u64 seq;
unsigned i;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 24);
out->atomic++;
- out->tabstops[0] = 24;
rcu_read_lock();
s = READ_ONCE(j->reservations);
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index 2fc24745..22470067 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -636,6 +636,8 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt,
b = bch2_backpointer_get_node(&trans, &iter,
bucket, bp_offset, bp);
ret = PTR_ERR_OR_ZERO(b);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ continue;
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index ecc64dd9..17b289b0 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -268,7 +268,8 @@ void bch2_rebalance_work_to_text(struct printbuf *out, struct bch_fs *c)
struct bch_fs_rebalance *r = &c->rebalance;
struct rebalance_work w = rebalance_work(c);
- out->tabstops[0] = 20;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 20);
prt_printf(out, "fullest_dev (%i):", w.dev_most_full_idx);
prt_tab(out);
diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c
index 24244bc3..fb3f8e40 100644
--- a/libbcachefs/subvolume.c
+++ b/libbcachefs/subvolume.c
@@ -278,8 +278,8 @@ int bch2_fs_check_snapshots(struct bch_fs *c)
bch2_trans_init(&trans, c, 0, 0);
- ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_snapshots,
- POS(BCACHEFS_ROOT_INO, 0),
+ ret = for_each_btree_key_commit(&trans, iter,
+ BTREE_ID_snapshots, POS_MIN,
BTREE_ITER_PREFETCH, k,
NULL, NULL, BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL,
check_snapshot(&trans, &iter, k));
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 55f8c65a..ade09bdf 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -1427,8 +1427,8 @@ void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb,
const struct bch_sb_field_ops *ops = type < BCH_SB_FIELD_NR
? bch2_sb_field_ops[type] : NULL;
- if (!out->tabstops[0])
- out->tabstops[0] = 32;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 32);
if (ops)
prt_printf(out, "%s", bch2_sb_fields[type]);
@@ -1476,8 +1476,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb,
u64 fields_have = 0;
unsigned nr_devices = 0;
- if (!out->tabstops[0])
- out->tabstops[0] = 32;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 32);
mi = bch2_sb_get_members(sb);
if (mi) {
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index 2c650055..2dfed1ff 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -560,7 +560,8 @@ SHOW(bch2_fs_counters)
u64 counter = 0;
u64 counter_since_mount = 0;
- out->tabstops[0] = 32;
+ printbuf_tabstop_push(out, 32);
+
#define x(t, ...) \
if (attr == &sysfs_##t) { \
counter = percpu_u64_get(&c->counters[BCH_COUNTER_##t]);\
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index ee2c7d9e..42da6623 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -268,6 +268,12 @@ static void bch2_quantiles_update(struct quantiles *q, u64 v)
}
}
+void bch2_prt_u64_binary(struct printbuf *out, u64 v, unsigned nr_bits)
+{
+ while (nr_bits)
+ prt_char(out, '0' + ((v >> --nr_bits) & 1));
+}
+
/* time stats: */
static void bch2_time_stats_update_one(struct time_stats *stats,
@@ -526,7 +532,8 @@ void bch2_pd_controller_init(struct bch_pd_controller *pd)
void bch2_pd_controller_debug_to_text(struct printbuf *out, struct bch_pd_controller *pd)
{
- out->tabstops[0] = 20;
+ if (!out->nr_tabstops)
+ printbuf_tabstop_push(out, 20);
prt_printf(out, "rate:");
prt_tab(out);
diff --git a/libbcachefs/util.h b/libbcachefs/util.h
index 1fe66fd9..ab7e43d4 100644
--- a/libbcachefs/util.h
+++ b/libbcachefs/util.h
@@ -353,6 +353,8 @@ bool bch2_is_zero(const void *, size_t);
u64 bch2_read_flag_list(char *, const char * const[]);
+void bch2_prt_u64_binary(struct printbuf *, u64, unsigned);
+
#define NR_QUANTILES 15
#define QUANTILE_IDX(i) inorder_to_eytzinger0(i, NR_QUANTILES)
#define QUANTILE_FIRST eytzinger0_first(NR_QUANTILES)
diff --git a/linux/printbuf.c b/linux/printbuf.c
index 3fc9ff47..5cf79d43 100644
--- a/linux/printbuf.c
+++ b/linux/printbuf.c
@@ -2,20 +2,13 @@
/* Copyright (C) 2022 Kent Overstreet */
#include <linux/err.h>
-#include <linux/math64.h>
-#include <linux/printbuf.h>
-#include <linux/slab.h>
-
-#ifdef __KERNEL__
#include <linux/export.h>
#include <linux/kernel.h>
-#else
-#ifndef EXPORT_SYMBOL
-#define EXPORT_SYMBOL(x)
-#endif
-#endif
+#include <linux/printbuf.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
-static inline size_t printbuf_linelen(struct printbuf *buf)
+static inline unsigned printbuf_linelen(struct printbuf *buf)
{
return buf->pos - buf->last_newline;
}
@@ -35,6 +28,11 @@ int printbuf_make_room(struct printbuf *out, unsigned extra)
return 0;
new_size = roundup_pow_of_two(out->size + extra);
+
+ /*
+ * Note: output buffer must be freeable with kfree(), it's not required
+ * that the user use printbuf_exit().
+ */
buf = krealloc(out->buf, new_size, !out->atomic ? GFP_KERNEL : GFP_NOWAIT);
if (!buf) {
@@ -78,25 +76,43 @@ void printbuf_exit(struct printbuf *buf)
}
EXPORT_SYMBOL(printbuf_exit);
-void prt_newline(struct printbuf *buf)
+void printbuf_tabstops_reset(struct printbuf *buf)
{
- unsigned i;
-
- printbuf_make_room(buf, 1 + buf->indent);
-
- __prt_char(buf, '\n');
+ buf->nr_tabstops = 0;
+}
+EXPORT_SYMBOL(printbuf_tabstops_reset);
- buf->last_newline = buf->pos;
+void printbuf_tabstop_pop(struct printbuf *buf)
+{
+ if (buf->nr_tabstops)
+ --buf->nr_tabstops;
+}
+EXPORT_SYMBOL(printbuf_tabstop_pop);
- for (i = 0; i < buf->indent; i++)
- __prt_char(buf, ' ');
+/*
+ * printbuf_tabstop_set - add a tabstop, n spaces from the previous tabstop
+ *
+ * @buf: printbuf to control
+ * @spaces: number of spaces from previous tabpstop
+ *
+ * In the future this function may allocate memory if setting more than
+ * PRINTBUF_INLINE_TABSTOPS or setting tabstops more than 255 spaces from start
+ * of line.
+ */
+int printbuf_tabstop_push(struct printbuf *buf, unsigned spaces)
+{
+ unsigned prev_tabstop = buf->nr_tabstops
+ ? buf->_tabstops[buf->nr_tabstops - 1]
+ : 0;
- printbuf_nul_terminate(buf);
+ if (WARN_ON(buf->nr_tabstops >= ARRAY_SIZE(buf->_tabstops)))
+ return -EINVAL;
- buf->last_field = buf->pos;
- buf->tabstop = 0;
+ buf->_tabstops[buf->nr_tabstops++] = prev_tabstop + spaces;
+ buf->has_indent_or_tabstops = true;
+ return 0;
}
-EXPORT_SYMBOL(prt_newline);
+EXPORT_SYMBOL(printbuf_tabstop_push);
/**
* printbuf_indent_add - add to the current indent level
@@ -113,8 +129,9 @@ void printbuf_indent_add(struct printbuf *buf, unsigned spaces)
spaces = 0;
buf->indent += spaces;
- while (spaces--)
- prt_char(buf, ' ');
+ prt_chars(buf, ' ', spaces);
+
+ buf->has_indent_or_tabstops = true;
}
EXPORT_SYMBOL(printbuf_indent_add);
@@ -137,168 +154,162 @@ void printbuf_indent_sub(struct printbuf *buf, unsigned spaces)
printbuf_nul_terminate(buf);
}
buf->indent -= spaces;
+
+ if (!buf->indent && !buf->nr_tabstops)
+ buf->has_indent_or_tabstops = false;
}
EXPORT_SYMBOL(printbuf_indent_sub);
-/**
- * prt_tab - Advance printbuf to the next tabstop
- *
- * @buf: printbuf to control
- *
- * Advance output to the next tabstop by printing spaces.
+void prt_newline(struct printbuf *buf)
+{
+ unsigned i;
+
+ printbuf_make_room(buf, 1 + buf->indent);
+
+ __prt_char(buf, '\n');
+
+ buf->last_newline = buf->pos;
+
+ for (i = 0; i < buf->indent; i++)
+ __prt_char(buf, ' ');
+
+ printbuf_nul_terminate(buf);
+
+ buf->last_field = buf->pos;
+ buf->cur_tabstop = 0;
+}
+EXPORT_SYMBOL(prt_newline);
+
+/*
+ * Returns spaces from start of line, if set, or 0 if unset:
*/
-void prt_tab(struct printbuf *out)
+static inline unsigned cur_tabstop(struct printbuf *buf)
{
- int spaces = max_t(int, 0, out->tabstops[out->tabstop] - printbuf_linelen(out));
+ return buf->cur_tabstop < buf->nr_tabstops
+ ? buf->_tabstops[buf->cur_tabstop]
+ : 0;
+}
- BUG_ON(out->tabstop > ARRAY_SIZE(out->tabstops));
+static void __prt_tab(struct printbuf *out)
+{
+ int spaces = max_t(int, 0, cur_tabstop(out) - printbuf_linelen(out));
prt_chars(out, ' ', spaces);
out->last_field = out->pos;
- out->tabstop++;
+ out->cur_tabstop++;
}
-EXPORT_SYMBOL(prt_tab);
/**
- * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
- * previous output
+ * prt_tab - Advance printbuf to the next tabstop
*
* @buf: printbuf to control
*
- * Advance output to the next tabstop by inserting spaces immediately after the
- * previous tabstop, right justifying previously outputted text.
+ * Advance output to the next tabstop by printing spaces.
*/
-void prt_tab_rjust(struct printbuf *buf)
+void prt_tab(struct printbuf *out)
{
- BUG_ON(buf->tabstop > ARRAY_SIZE(buf->tabstops));
+ if (WARN_ON(!cur_tabstop(out)))
+ return;
+
+ __prt_tab(out);
+}
+EXPORT_SYMBOL(prt_tab);
- if (printbuf_linelen(buf) < buf->tabstops[buf->tabstop]) {
- unsigned move = buf->pos - buf->last_field;
- unsigned shift = buf->tabstops[buf->tabstop] -
- printbuf_linelen(buf);
+static void __prt_tab_rjust(struct printbuf *buf)
+{
+ unsigned move = buf->pos - buf->last_field;
+ int pad = (int) cur_tabstop(buf) - (int) printbuf_linelen(buf);
- printbuf_make_room(buf, shift);
+ if (pad > 0) {
+ printbuf_make_room(buf, pad);
- if (buf->last_field + shift < buf->size)
- memmove(buf->buf + buf->last_field + shift,
+ if (buf->last_field + pad < buf->size)
+ memmove(buf->buf + buf->last_field + pad,
buf->buf + buf->last_field,
- min(move, buf->size - 1 - buf->last_field - shift));
+ min(move, buf->size - 1 - buf->last_field - pad));
if (buf->last_field < buf->size)
memset(buf->buf + buf->last_field, ' ',
- min(shift, buf->size - buf->last_field));
+ min((unsigned) pad, buf->size - buf->last_field));
- buf->pos += shift;
+ buf->pos += pad;
printbuf_nul_terminate(buf);
}
buf->last_field = buf->pos;
- buf->tabstop++;
+ buf->cur_tabstop++;
}
-EXPORT_SYMBOL(prt_tab_rjust);
-enum string_size_units {
- STRING_UNITS_10, /* use powers of 10^3 (standard SI) */
- STRING_UNITS_2, /* use binary powers of 2^10 */
-};
-static int string_get_size(u64 size, u64 blk_size,
- const enum string_size_units units,
- char *buf, int len)
+/**
+ * prt_tab_rjust - Advance printbuf to the next tabstop, right justifying
+ * previous output
+ *
+ * @buf: printbuf to control
+ *
+ * Advance output to the next tabstop by inserting spaces immediately after the
+ * previous tabstop, right justifying previously outputted text.
+ */
+void prt_tab_rjust(struct printbuf *buf)
{
- static const char *const units_10[] = {
- "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
- };
- static const char *const units_2[] = {
- "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
- };
- static const char *const *const units_str[] = {
- [STRING_UNITS_10] = units_10,
- [STRING_UNITS_2] = units_2,
- };
- static const unsigned int divisor[] = {
- [STRING_UNITS_10] = 1000,
- [STRING_UNITS_2] = 1024,
- };
- static const unsigned int rounding[] = { 500, 50, 5 };
- int i = 0, j;
- u32 remainder = 0, sf_cap;
- char tmp[13];
- const char *unit;
-
- tmp[0] = '\0';
-
- if (blk_size == 0)
- size = 0;
- if (size == 0)
- goto out;
-
- /* This is Napier's algorithm. Reduce the original block size to
- *
- * coefficient * divisor[units]^i
- *
- * we do the reduction so both coefficients are just under 32 bits so
- * that multiplying them together won't overflow 64 bits and we keep
- * as much precision as possible in the numbers.
- *
- * Note: it's safe to throw away the remainders here because all the
- * precision is in the coefficients.
- */
- while (blk_size >> 32) {
- do_div(blk_size, divisor[units]);
- i++;
- }
-
- while (size >> 32) {
- do_div(size, divisor[units]);
- i++;
- }
+ if (WARN_ON(!cur_tabstop(buf)))
+ return;
- /* now perform the actual multiplication keeping i as the sum of the
- * two logarithms */
- size *= blk_size;
-
- /* and logarithmically reduce it until it's just under the divisor */
- while (size >= divisor[units]) {
- remainder = do_div(size, divisor[units]);
- i++;
- }
+ __prt_tab_rjust(buf);
+}
+EXPORT_SYMBOL(prt_tab_rjust);
- /* work out in j how many digits of precision we need from the
- * remainder */
- sf_cap = size;
- for (j = 0; sf_cap*10 < 1000; j++)
- sf_cap *= 10;
-
- if (units == STRING_UNITS_2) {
- /* express the remainder as a decimal. It's currently the
- * numerator of a fraction whose denominator is
- * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
- remainder *= 1000;
- remainder >>= 10;
- }
+/**
+ * prt_bytes_indented - Print an array of chars, handling embedded control characters
+ *
+ * @out: printbuf to output to
+ * @str: string to print
+ * @count: number of bytes to print
+ *
+ * The following contol characters are handled as so:
+ * \n: prt_newline newline that obeys current indent level
+ * \t: prt_tab advance to next tabstop
+ * \r: prt_tab_rjust advance to next tabstop, with right justification
+ */
+void prt_bytes_indented(struct printbuf *out, const char *str, unsigned count)
+{
+ const char *unprinted_start = str;
+ const char *end = str + count;
- /* add a 5 to the digit below what will be printed to ensure
- * an arithmetical round up and carry it through to size */
- remainder += rounding[j];
- if (remainder >= 1000) {
- remainder -= 1000;
- size += 1;
+ if (!out->has_indent_or_tabstops || out->suppress_indent_tabstop_handling) {
+ prt_bytes(out, str, count);
+ return;
}
- if (j) {
- snprintf(tmp, sizeof(tmp), ".%03u", remainder);
- tmp[j+1] = '\0';
+ while (str != end) {
+ switch (*str) {
+ case '\n':
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+ unprinted_start = str + 1;
+ prt_newline(out);
+ break;
+ case '\t':
+ if (likely(cur_tabstop(out))) {
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+ unprinted_start = str + 1;
+ __prt_tab(out);
+ }
+ break;
+ case '\r':
+ if (likely(cur_tabstop(out))) {
+ prt_bytes(out, unprinted_start, str - unprinted_start);
+ unprinted_start = str + 1;
+ __prt_tab_rjust(out);
+ }
+ break;
+ }
+
+ str++;
}
- out:
- if (i >= ARRAY_SIZE(units_2))
- unit = "UNK";
- else
- unit = units_str[units][i];
-
- return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
+ prt_bytes(out, unprinted_start, str - unprinted_start);
}
+EXPORT_SYMBOL(prt_bytes_indented);
/**
* prt_human_readable_u64 - Print out a u64 in human readable units
diff --git a/linux/string_helpers.c b/linux/string_helpers.c
new file mode 100644
index 00000000..3d720bc0
--- /dev/null
+++ b/linux/string_helpers.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ * Copyright (C) 2013, Intel Corporation
+ */
+#include <linux/bug.h>
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/printbuf.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size: The size to be converted in blocks
+ * @blk_size: Size of the block (use 1 for size in bytes)
+ * @units: units to use (powers of 1000 or 1024)
+ * @buf: buffer to format to
+ * @len: length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units. @buf should have room for
+ * at least 9 bytes and will always be zero terminated.
+ *
+ */
+int string_get_size(u64 size, u64 blk_size, const enum string_size_units units,
+ char *buf, int len)
+{
+ static const char *const units_10[] = {
+ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"
+ };
+ static const char *const units_2[] = {
+ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"
+ };
+ static const char *const *const units_str[] = {
+ [STRING_UNITS_10] = units_10,
+ [STRING_UNITS_2] = units_2,
+ };
+ static const unsigned int divisor[] = {
+ [STRING_UNITS_10] = 1000,
+ [STRING_UNITS_2] = 1024,
+ };
+ static const unsigned int rounding[] = { 500, 50, 5 };
+ int i = 0, j;
+ u32 remainder = 0, sf_cap;
+ char tmp[8];
+ const char *unit;
+
+ tmp[0] = '\0';
+
+ if (blk_size == 0)
+ size = 0;
+ if (size == 0)
+ goto out;
+
+ /* This is Napier's algorithm. Reduce the original block size to
+ *
+ * coefficient * divisor[units]^i
+ *
+ * we do the reduction so both coefficients are just under 32 bits so
+ * that multiplying them together won't overflow 64 bits and we keep
+ * as much precision as possible in the numbers.
+ *
+ * Note: it's safe to throw away the remainders here because all the
+ * precision is in the coefficients.
+ */
+ while (blk_size >> 32) {
+ do_div(blk_size, divisor[units]);
+ i++;
+ }
+
+ while (size >> 32) {
+ do_div(size, divisor[units]);
+ i++;
+ }
+
+ /* now perform the actual multiplication keeping i as the sum of the
+ * two logarithms */
+ size *= blk_size;
+
+ /* and logarithmically reduce it until it's just under the divisor */
+ while (size >= divisor[units]) {
+ remainder = do_div(size, divisor[units]);
+ i++;
+ }
+
+ /* work out in j how many digits of precision we need from the
+ * remainder */
+ sf_cap = size;
+ for (j = 0; sf_cap*10 < 1000; j++)
+ sf_cap *= 10;
+
+ if (units == STRING_UNITS_2) {
+ /* express the remainder as a decimal. It's currently the
+ * numerator of a fraction whose denominator is
+ * divisor[units], which is 1 << 10 for STRING_UNITS_2 */
+ remainder *= 1000;
+ remainder >>= 10;
+ }
+
+ /* add a 5 to the digit below what will be printed to ensure
+ * an arithmetical round up and carry it through to size */
+ remainder += rounding[j];
+ if (remainder >= 1000) {
+ remainder -= 1000;
+ size += 1;
+ }
+
+ if (j) {
+ snprintf(tmp, sizeof(tmp), ".%03u", remainder);
+ tmp[j+1] = '\0';
+ }
+
+ out:
+ if (i >= ARRAY_SIZE(units_2))
+ unit = "UNK";
+ else
+ unit = units_str[units][i];
+
+ return snprintf(buf, len, "%u%s %s", (u32)size, tmp, unit);
+}
+EXPORT_SYMBOL(string_get_size);