summaryrefslogtreecommitdiff
path: root/libbcachefs/btree_iter.h
blob: 910f6d7bc961818cf4463eabcdf99a68e33c6622 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_ITER_H
#define _BCACHEFS_BTREE_ITER_H

#include "bset.h"
#include "btree_types.h"

#include <trace/events/bcachefs.h>

static inline void __btree_path_get(struct btree_path *path, bool intent)
{
	path->ref++;
	path->intent_ref += intent;
}

static inline bool __btree_path_put(struct btree_path *path, bool intent)
{
	EBUG_ON(!path->ref);
	EBUG_ON(!path->intent_ref && intent);
	path->intent_ref -= intent;
	return --path->ref == 0;
}

static inline void btree_path_set_dirty(struct btree_path *path,
					enum btree_path_uptodate u)
{
	path->uptodate = max_t(unsigned, path->uptodate, u);
}

static inline struct btree *btree_path_node(struct btree_path *path,
					    unsigned level)
{
	return level < BTREE_MAX_DEPTH ? path->l[level].b : NULL;
}

static inline bool btree_node_lock_seq_matches(const struct btree_path *path,
					const struct btree *b, unsigned level)
{
	/*
	 * We don't compare the low bits of the lock sequence numbers because
	 * @path might have taken a write lock on @b, and we don't want to skip
	 * the linked path if the sequence numbers were equal before taking that
	 * write lock. The lock sequence number is incremented by taking and
	 * releasing write locks and is even when unlocked:
	 */
	return path->l[level].lock_seq >> 1 == b->c.lock.state.seq >> 1;
}

static inline struct btree *btree_node_parent(struct btree_path *path,
					      struct btree *b)
{
	return btree_path_node(path, b->c.level + 1);
}

/* Iterate over paths within a transaction: */

static inline struct btree_path *
__trans_next_path(struct btree_trans *trans, unsigned idx)
{
	u64 l;

	if (idx == BTREE_ITER_MAX)
		return NULL;

	l = trans->paths_allocated >> idx;
	if (!l)
		return NULL;

	idx += __ffs64(l);
	EBUG_ON(idx >= BTREE_ITER_MAX);
	EBUG_ON(trans->paths[idx].idx != idx);
	return &trans->paths[idx];
}

void bch2_btree_path_check_sort(struct btree_trans *, struct btree_path *, int);

#define trans_for_each_path_from(_trans, _path, _start)			\
	for (_path = __trans_next_path((_trans), _start);		\
	     (_path);							\
	     _path = __trans_next_path((_trans), (_path)->idx + 1))

#define trans_for_each_path(_trans, _path)				\
	trans_for_each_path_from(_trans, _path, 0)

static inline struct btree_path *next_btree_path(struct btree_trans *trans, struct btree_path *path)
{
	unsigned idx = path ? path->sorted_idx + 1 : 0;

	EBUG_ON(idx > trans->nr_sorted);

	return idx < trans->nr_sorted
		? trans->paths + trans->sorted[idx]
		: NULL;
}

static inline struct btree_path *prev_btree_path(struct btree_trans *trans, struct btree_path *path)
{
	EBUG_ON(path->sorted_idx >= trans->nr_sorted);
	return path->sorted_idx
		? trans->paths + trans->sorted[path->sorted_idx - 1]
		: NULL;
}

#define trans_for_each_path_inorder(_trans, _path, _i)			\
	for (_i = 0;							\
	     ((_path) = (_trans)->paths + trans->sorted[_i]), (_i) < (_trans)->nr_sorted;\
	     _i++)

static inline bool __path_has_node(const struct btree_path *path,
				   const struct btree *b)
{
	return path->l[b->c.level].b == b &&
		btree_node_lock_seq_matches(path, b, b->c.level);
}

static inline struct btree_path *
__trans_next_path_with_node(struct btree_trans *trans, struct btree *b,
			    unsigned idx)
{
	struct btree_path *path = __trans_next_path(trans, idx);

	while (path && !__path_has_node(path, b))
		path = __trans_next_path(trans, path->idx + 1);

	return path;
}

#define trans_for_each_path_with_node(_trans, _b, _path)		\
	for (_path = __trans_next_path_with_node((_trans), (_b), 0);	\
	     (_path);							\
	     _path = __trans_next_path_with_node((_trans), (_b),	\
						 (_path)->idx + 1))

struct btree_path * __must_check
bch2_btree_path_make_mut(struct btree_trans *, struct btree_path *,
			 bool, unsigned long);
struct btree_path * __must_check
bch2_btree_path_set_pos(struct btree_trans *, struct btree_path *,
			struct bpos, bool, unsigned long);
int __must_check bch2_btree_path_traverse(struct btree_trans *,
					  struct btree_path *, unsigned);
struct btree_path *bch2_path_get(struct btree_trans *, enum btree_id, struct bpos,
				 unsigned, unsigned, unsigned, unsigned long);
inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *, struct bkey *);

struct bkey_i *bch2_btree_journal_peek_slot(struct btree_trans *,
					struct btree_iter *, struct bpos);

inline void bch2_btree_path_level_init(struct btree_trans *,
				       struct btree_path *, struct btree *);

#ifdef CONFIG_BCACHEFS_DEBUG
void bch2_trans_verify_paths(struct btree_trans *);
void bch2_assert_pos_locked(struct btree_trans *, enum btree_id,
			    struct bpos, bool);
#else
static inline void bch2_trans_verify_paths(struct btree_trans *trans) {}
static inline void bch2_assert_pos_locked(struct btree_trans *trans, enum btree_id id,
					  struct bpos pos, bool key_cache) {}
#endif

void bch2_btree_path_fix_key_modified(struct btree_trans *trans,
				      struct btree *, struct bkey_packed *);
void bch2_btree_node_iter_fix(struct btree_trans *trans, struct btree_path *,
			      struct btree *, struct btree_node_iter *,
			      struct bkey_packed *, unsigned, unsigned);

int bch2_btree_path_relock_intent(struct btree_trans *, struct btree_path *);

void bch2_path_put(struct btree_trans *, struct btree_path *, bool);

int bch2_trans_relock(struct btree_trans *);
void bch2_trans_unlock(struct btree_trans *);
bool bch2_trans_locked(struct btree_trans *);

static inline bool trans_was_restarted(struct btree_trans *trans, u32 restart_count)
{
	return restart_count != trans->restart_count;
}

void bch2_trans_verify_not_restarted(struct btree_trans *, u32);

__always_inline
static inline int btree_trans_restart_nounlock(struct btree_trans *trans, int err)
{
	BUG_ON(err <= 0);
	BUG_ON(!bch2_err_matches(err, BCH_ERR_transaction_restart));

	trans->restarted = err;
	return -err;
}

__always_inline
static inline int btree_trans_restart(struct btree_trans *trans, int err)
{
	btree_trans_restart_nounlock(trans, err);
	return -err;
}

bool bch2_btree_node_upgrade(struct btree_trans *,
			     struct btree_path *, unsigned);

void __bch2_btree_path_downgrade(struct btree_trans *, struct btree_path *, unsigned);

static inline void bch2_btree_path_downgrade(struct btree_trans *trans,
					     struct btree_path *path)
{
	unsigned new_locks_want = path->level + !!path->intent_ref;

	if (path->locks_want > new_locks_want)
		__bch2_btree_path_downgrade(trans, path, new_locks_want);
}

void bch2_trans_downgrade(struct btree_trans *);

void bch2_trans_node_add(struct btree_trans *trans, struct btree *);
void bch2_trans_node_reinit_iter(struct btree_trans *, struct btree *);

int __must_check __bch2_btree_iter_traverse(struct btree_iter *iter);
int __must_check bch2_btree_iter_traverse(struct btree_iter *);

struct btree *bch2_btree_iter_peek_node(struct btree_iter *);
struct btree *bch2_btree_iter_next_node(struct btree_iter *);

struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);

struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *);

static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
{
	return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
}

struct bkey_s_c bch2_btree_iter_peek_prev(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev(struct btree_iter *);

struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_next_slot(struct btree_iter *);
struct bkey_s_c bch2_btree_iter_prev_slot(struct btree_iter *);

bool bch2_btree_iter_advance(struct btree_iter *);
bool bch2_btree_iter_rewind(struct btree_iter *);

static inline void __bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
	iter->k.type = KEY_TYPE_deleted;
	iter->k.p.inode		= iter->pos.inode	= new_pos.inode;
	iter->k.p.offset	= iter->pos.offset	= new_pos.offset;
	iter->k.p.snapshot	= iter->pos.snapshot	= new_pos.snapshot;
	iter->k.size = 0;
}

static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
	if (unlikely(iter->update_path))
		bch2_path_put(iter->trans, iter->update_path,
			      iter->flags & BTREE_ITER_INTENT);
	iter->update_path = NULL;

	if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
		new_pos.snapshot = iter->snapshot;

	__bch2_btree_iter_set_pos(iter, new_pos);
}

static inline void bch2_btree_iter_set_pos_to_extent_start(struct btree_iter *iter)
{
	BUG_ON(!(iter->flags & BTREE_ITER_IS_EXTENTS));
	iter->pos = bkey_start_pos(&iter->k);
}

static inline void bch2_btree_iter_set_snapshot(struct btree_iter *iter, u32 snapshot)
{
	struct bpos pos = iter->pos;

	iter->snapshot = snapshot;
	pos.snapshot = snapshot;
	bch2_btree_iter_set_pos(iter, pos);
}

void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
			  unsigned, struct bpos, unsigned);
void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
			       enum btree_id, struct bpos,
			       unsigned, unsigned, unsigned);
void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);

static inline void set_btree_iter_dontneed(struct btree_iter *iter)
{
	if (!iter->trans->restarted)
		iter->path->preserve = false;
}

void *__bch2_trans_kmalloc(struct btree_trans *, size_t);

static inline void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
{
	unsigned new_top = trans->mem_top + size;
	void *p = trans->mem + trans->mem_top;

	if (likely(new_top <= trans->mem_bytes)) {
		trans->mem_top += size;
		memset(p, 0, size);
		return p;
	} else {
		return __bch2_trans_kmalloc(trans, size);

	}
}

u32 bch2_trans_begin(struct btree_trans *);

static inline struct btree *
__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
{
	struct btree *b;

	while (b = bch2_btree_iter_peek_node(iter),
	       bch2_err_matches(PTR_ERR_OR_ZERO(b), BCH_ERR_transaction_restart))
		bch2_trans_begin(trans);

	return b;
}

#define __for_each_btree_node(_trans, _iter, _btree_id, _start,		\
			      _locks_want, _depth, _flags, _b, _ret)	\
	for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id),	\
				_start, _locks_want, _depth, _flags);	\
	     (_b) = __btree_iter_peek_node_and_restart((_trans), &(_iter)),\
	     !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b);			\
	     (_b) = bch2_btree_iter_next_node(&(_iter)))

#define for_each_btree_node(_trans, _iter, _btree_id, _start,		\
			    _flags, _b, _ret)				\
	__for_each_btree_node(_trans, _iter, _btree_id, _start,		\
			      0, 0, _flags, _b, _ret)

static inline int bkey_err(struct bkey_s_c k)
{
	return PTR_ERR_OR_ZERO(k.k);
}

static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter,
							     unsigned flags)
{
	BUG_ON(flags & BTREE_ITER_ALL_LEVELS);

	return  flags & BTREE_ITER_SLOTS      ? bch2_btree_iter_peek_slot(iter) :
						bch2_btree_iter_peek_prev(iter);
}

static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
							unsigned flags)
{
	return  flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) :
		flags & BTREE_ITER_SLOTS      ? bch2_btree_iter_peek_slot(iter) :
						bch2_btree_iter_peek(iter);
}

static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
							     struct bpos end,
							     unsigned flags)
{
	if (!(flags & BTREE_ITER_SLOTS))
		return bch2_btree_iter_peek_upto(iter, end);

	if (bkey_cmp(iter->pos, end) > 0)
		return bkey_s_c_null;

	return bch2_btree_iter_peek_slot(iter);
}

static inline int btree_trans_too_many_iters(struct btree_trans *trans)
{
	if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) {
		trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_);
		return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters);
	}

	return 0;
}

static inline struct bkey_s_c
__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
				   struct btree_iter *iter, unsigned flags)
{
	struct bkey_s_c k;

	while (btree_trans_too_many_iters(trans) ||
	       (k = bch2_btree_iter_peek_type(iter, flags),
		bch2_err_matches(bkey_err(k), BCH_ERR_transaction_restart)))
		bch2_trans_begin(trans);

	return k;
}

#define lockrestart_do(_trans, _do)					\
({									\
	u32 _restart_count;						\
	int _ret;							\
									\
	do {								\
		_restart_count = bch2_trans_begin(_trans);		\
		_ret = (_do);						\
	} while (bch2_err_matches(_ret, BCH_ERR_transaction_restart));	\
									\
	if (!_ret)							\
		bch2_trans_verify_not_restarted(_trans, _restart_count);\
									\
	_ret;								\
})

/*
 * nested_lockrestart_do(), nested_commit_do():
 *
 * These are like lockrestart_do() and commit_do(), with two differences:
 *
 *  - We don't call bch2_trans_begin() unless we had a transaction restart
 *  - We return -BCH_ERR_transaction_restart_nested if we succeeded after a
 *  transaction restart
 */
#define nested_lockrestart_do(_trans, _do)				\
({									\
	u32 _restart_count, _orig_restart_count;			\
	int _ret;							\
									\
	_restart_count = _orig_restart_count = (_trans)->restart_count;	\
									\
	while (bch2_err_matches(_ret = (_do), BCH_ERR_transaction_restart))\
		_restart_count = bch2_trans_begin(_trans);		\
									\
	if (!_ret)							\
		bch2_trans_verify_not_restarted(_trans, _restart_count);\
									\
	if (!_ret && trans_was_restarted(_trans, _orig_restart_count))	\
		_ret = -BCH_ERR_transaction_restart_nested;		\
									\
	_ret;								\
})

#define for_each_btree_key2(_trans, _iter, _btree_id,			\
			    _start, _flags, _k, _do)			\
({									\
	int _ret = 0;							\
									\
	bch2_trans_iter_init((_trans), &(_iter), (_btree_id),		\
			     (_start), (_flags));			\
									\
	while (1) {							\
		u32 _restart_count = bch2_trans_begin(_trans);		\
		(_k) = bch2_btree_iter_peek_type(&(_iter), (_flags));	\
		if (!(_k).k) {						\
			_ret = 0;					\
			break;						\
		}							\
									\
		_ret = bkey_err(_k) ?: (_do);				\
		if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
			continue;					\
		if (_ret)						\
			break;						\
		bch2_trans_verify_not_restarted(_trans, _restart_count);\
		if (!bch2_btree_iter_advance(&(_iter)))			\
			break;						\
	}								\
									\
	bch2_trans_iter_exit((_trans), &(_iter));			\
	_ret;								\
})

#define for_each_btree_key_reverse(_trans, _iter, _btree_id,		\
				   _start, _flags, _k, _do)		\
({									\
	int _ret = 0;							\
									\
	bch2_trans_iter_init((_trans), &(_iter), (_btree_id),		\
			     (_start), (_flags));			\
									\
	while (1) {							\
		u32 _restart_count = bch2_trans_begin(_trans);		\
		(_k) = bch2_btree_iter_peek_prev_type(&(_iter), (_flags));\
		if (!(_k).k) {						\
			_ret = 0;					\
			break;						\
		}							\
									\
		_ret = bkey_err(_k) ?: (_do);				\
		if (bch2_err_matches(_ret, BCH_ERR_transaction_restart))\
			continue;					\
		if (_ret)						\
			break;						\
		bch2_trans_verify_not_restarted(_trans, _restart_count);\
		if (!bch2_btree_iter_rewind(&(_iter)))			\
			break;						\
	}								\
									\
	bch2_trans_iter_exit((_trans), &(_iter));			\
	_ret;								\
})

#define for_each_btree_key_commit(_trans, _iter, _btree_id,		\
				  _start, _iter_flags, _k,		\
				  _disk_res, _journal_seq, _commit_flags,\
				  _do)					\
	for_each_btree_key2(_trans, _iter, _btree_id, _start, _iter_flags, _k,\
			    (_do) ?: bch2_trans_commit(_trans, (_disk_res),\
					(_journal_seq), (_commit_flags)))

#define for_each_btree_key(_trans, _iter, _btree_id,			\
			   _start, _flags, _k, _ret)			\
	for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),	\
				  (_start), (_flags));			\
	     (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
	     !((_ret) = bkey_err(_k)) && (_k).k;			\
	     bch2_btree_iter_advance(&(_iter)))

#define for_each_btree_key_norestart(_trans, _iter, _btree_id,		\
			   _start, _flags, _k, _ret)			\
	for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),	\
				  (_start), (_flags));			\
	     (_k) = bch2_btree_iter_peek_type(&(_iter), _flags),	\
	     !((_ret) = bkey_err(_k)) && (_k).k;			\
	     bch2_btree_iter_advance(&(_iter)))

#define for_each_btree_key_upto_norestart(_trans, _iter, _btree_id,	\
			   _start, _end, _flags, _k, _ret)		\
	for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),	\
				  (_start), (_flags));			\
	     (_k) = bch2_btree_iter_peek_upto_type(&(_iter), _end, _flags),\
	     !((_ret) = bkey_err(_k)) && (_k).k;			\
	     bch2_btree_iter_advance(&(_iter)))

#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret)	\
	for (;								\
	     (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
	     !((_ret) = bkey_err(_k)) && (_k).k;			\
	     bch2_btree_iter_advance(&(_iter)))

#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret)	\
	for (;								\
	     (_k) = bch2_btree_iter_peek_type(&(_iter), _flags),	\
	     !((_ret) = bkey_err(_k)) && (_k).k;			\
	     bch2_btree_iter_advance(&(_iter)))

/* new multiple iterator interface: */

void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *);
void bch2_btree_path_to_text(struct printbuf *, struct btree_path *);
void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *);
void bch2_dump_trans_updates(struct btree_trans *);
void bch2_dump_trans_paths_updates(struct btree_trans *);
void __bch2_trans_init(struct btree_trans *, struct bch_fs *, const char *);
void bch2_trans_exit(struct btree_trans *);

#define bch2_trans_init(_trans, _c, _nr_iters, _mem) __bch2_trans_init(_trans, _c, __func__)

void bch2_btree_trans_to_text(struct printbuf *, struct btree_trans *);

void bch2_fs_btree_iter_exit(struct bch_fs *);
int bch2_fs_btree_iter_init(struct bch_fs *);

#endif /* _BCACHEFS_BTREE_ITER_H */