diff options
Diffstat (limited to 'libbcachefs/extents.c')
-rw-r--r-- | libbcachefs/extents.c | 539 |
1 files changed, 251 insertions, 288 deletions
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index 7d2f5ccb..6e79f491 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -19,6 +19,7 @@ #include "inode.h" #include "journal.h" #include "super-io.h" +#include "util.h" #include "xattr.h" #include <trace/events/bcachefs.h> @@ -155,6 +156,44 @@ unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k) return nr_ptrs; } +unsigned bch2_extent_is_compressed(struct bkey_s_c k) +{ + struct bkey_s_c_extent e; + const struct bch_extent_ptr *ptr; + struct bch_extent_crc_unpacked crc; + unsigned ret = 0; + + switch (k.k->type) { + case BCH_EXTENT: + case BCH_EXTENT_CACHED: + e = bkey_s_c_to_extent(k); + + extent_for_each_ptr_crc(e, ptr, crc) + if (!ptr->cached && + crc.compression_type != BCH_COMPRESSION_NONE && + crc.compressed_size < crc.live_size) + ret = max_t(unsigned, ret, crc.compressed_size); + } + + return ret; +} + +bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, + struct bch_extent_ptr m, u64 offset) +{ + const struct bch_extent_ptr *ptr; + struct bch_extent_crc_unpacked crc; + + extent_for_each_ptr_crc(e, ptr, crc) + if (ptr->dev == m.dev && + ptr->gen == m.gen && + (s64) ptr->offset + crc.offset - bkey_start_offset(e.k) == + (s64) m.offset - offset) + return ptr; + + return NULL; +} + /* Doesn't cleanup redundant crcs */ void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr) { @@ -186,24 +225,30 @@ found: bch2_extent_drop_ptr(e, ptr); } -/* returns true if equal */ -static bool crc_cmp(union bch_extent_crc *l, union bch_extent_crc *r) +static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u, + struct bch_extent_crc_unpacked n) { - return extent_crc_type(l) == extent_crc_type(r) && - !memcmp(l, r, extent_entry_bytes(to_entry(l))); + return !u.compression_type && + u.csum_type && + u.uncompressed_size > u.live_size && + bch2_csum_type_is_encryption(u.csum_type) == + bch2_csum_type_is_encryption(n.csum_type); } -/* Increment pointers after @crc by crc's offset until the next crc entry: */ -void bch2_extent_crc_narrow_pointers(struct bkey_s_extent e, union bch_extent_crc *crc) +bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent e, + struct bch_extent_crc_unpacked n) { - union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; - extent_for_each_entry_from(e, entry, extent_entry_next(to_entry(crc))) { - if (!extent_entry_is_ptr(entry)) - return; + if (!n.csum_type) + return false; - entry->ptr.offset += crc_offset(crc); - } + extent_for_each_crc(e, crc, i) + if (can_narrow_crc(crc, n)) + return true; + + return false; } /* @@ -214,96 +259,50 @@ void bch2_extent_crc_narrow_pointers(struct bkey_s_extent e, union bch_extent_cr * not compressed, we can modify them to point to only the data that is * currently live (so that readers won't have to bounce) while we've got the * checksum we need: - * - * XXX: to guard against data being corrupted while in memory, instead of - * recomputing the checksum here, it would be better in the read path to instead - * of computing the checksum of the entire extent: - * - * | extent | - * - * compute the checksums of the live and dead data separately - * | dead data || live data || dead data | - * - * and then verify that crc_dead1 + crc_live + crc_dead2 == orig_crc, and then - * use crc_live here (that we verified was correct earlier) - * - * note: doesn't work with encryption */ -void bch2_extent_narrow_crcs(struct bkey_s_extent e) +bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, + struct bch_extent_crc_unpacked n) { - union bch_extent_crc *crc; - bool have_wide = false, have_narrow = false; - struct bch_csum csum = { 0 }; - unsigned csum_type = 0; - - extent_for_each_crc(e, crc) { - if (crc_compression_type(crc) || - bch2_csum_type_is_encryption(crc_csum_type(crc))) - continue; - - if (crc_uncompressed_size(e.k, crc) != e.k->size) { - have_wide = true; - } else { - have_narrow = true; - csum = crc_csum(crc); - csum_type = crc_csum_type(crc); - } - } - - if (!have_wide || !have_narrow) - return; - - extent_for_each_crc(e, crc) { - if (crc_compression_type(crc)) - continue; - - if (crc_uncompressed_size(e.k, crc) != e.k->size) { - switch (extent_crc_type(crc)) { - case BCH_EXTENT_CRC_NONE: - BUG(); - case BCH_EXTENT_CRC32: - if (bch_crc_bytes[csum_type] > 4) - continue; - - bch2_extent_crc_narrow_pointers(e, crc); - crc->crc32._compressed_size = e.k->size - 1; - crc->crc32._uncompressed_size = e.k->size - 1; - crc->crc32.offset = 0; - crc->crc32.csum_type = csum_type; - crc->crc32.csum = csum.lo; + struct bch_extent_crc_unpacked u; + struct bch_extent_ptr *ptr; + union bch_extent_entry *i; + + /* Find a checksum entry that covers only live data: */ + if (!n.csum_type) + extent_for_each_crc(extent_i_to_s(e), u, i) + if (!u.compression_type && + u.csum_type && + u.live_size == u.uncompressed_size) { + n = u; break; - case BCH_EXTENT_CRC64: - if (bch_crc_bytes[csum_type] > 10) - continue; + } - bch2_extent_crc_narrow_pointers(e, crc); - crc->crc64._compressed_size = e.k->size - 1; - crc->crc64._uncompressed_size = e.k->size - 1; - crc->crc64.offset = 0; - crc->crc64.csum_type = csum_type; - crc->crc64.csum_lo = csum.lo; - crc->crc64.csum_hi = csum.hi; - break; - case BCH_EXTENT_CRC128: - if (bch_crc_bytes[csum_type] > 16) - continue; + if (!bch2_can_narrow_extent_crcs(extent_i_to_s_c(e), n)) + return false; - bch2_extent_crc_narrow_pointers(e, crc); - crc->crc128._compressed_size = e.k->size - 1; - crc->crc128._uncompressed_size = e.k->size - 1; - crc->crc128.offset = 0; - crc->crc128.csum_type = csum_type; - crc->crc128.csum = csum; - break; - } + BUG_ON(n.compression_type); + BUG_ON(n.offset); + BUG_ON(n.live_size != e->k.size); + + bch2_extent_crc_append(e, n); +restart_narrow_pointers: + extent_for_each_ptr_crc(extent_i_to_s(e), ptr, u) + if (can_narrow_crc(u, n)) { + ptr->offset += u.offset; + extent_ptr_append(e, *ptr); + __bch2_extent_drop_ptr(extent_i_to_s(e), ptr); + goto restart_narrow_pointers; } - } + + bch2_extent_drop_redundant_crcs(extent_i_to_s(e)); + return true; } void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e) { union bch_extent_entry *entry = e.v->start; union bch_extent_crc *crc, *prev = NULL; + struct bch_extent_crc_unpacked u, prev_u; while (entry != extent_entry_last(e)) { union bch_extent_entry *next = extent_entry_next(entry); @@ -313,6 +312,7 @@ void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e) goto next; crc = entry_to_crc(entry); + u = bch2_extent_crc_unpack(e.k, crc); if (next == extent_entry_last(e)) { /* crc entry with no pointers after it: */ @@ -324,20 +324,28 @@ void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e) goto drop; } - if (prev && crc_cmp(crc, prev)) { + if (prev && !memcmp(&u, &prev_u, sizeof(u))) { /* identical to previous crc entry: */ goto drop; } if (!prev && - !crc_csum_type(crc) && - !crc_compression_type(crc)) { + !u.csum_type && + !u.compression_type) { /* null crc entry: */ - bch2_extent_crc_narrow_pointers(e, crc); + union bch_extent_entry *e2; + + extent_for_each_entry_from(e, e2, extent_entry_next(entry)) { + if (!extent_entry_is_ptr(e2)) + break; + + e2->ptr.offset += u.offset; + } goto drop; } prev = crc; + prev_u = u; next: entry = next; continue; @@ -453,7 +461,7 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, { char *out = buf, *end = buf + size; const union bch_extent_entry *entry; - const union bch_extent_crc *crc; + struct bch_extent_crc_unpacked crc; const struct bch_extent_ptr *ptr; struct bch_dev *ca; bool first = true; @@ -468,13 +476,14 @@ static size_t extent_print_ptrs(struct bch_fs *c, char *buf, case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: case BCH_EXTENT_ENTRY_crc128: - crc = entry_to_crc(entry); - - p("crc: c_size %u size %u offset %u csum %u compress %u", - crc_compressed_size(e.k, crc), - crc_uncompressed_size(e.k, crc), - crc_offset(crc), crc_csum_type(crc), - crc_compression_type(crc)); + crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); + + p("crc: c_size %u size %u offset %u nonce %u csum %u compress %u", + crc.compressed_size, + crc.uncompressed_size, + crc.offset, crc.nonce, + crc.csum_type, + crc.compression_type); break; case BCH_EXTENT_ENTRY_ptr: ptr = entry_to_ptr(entry); @@ -499,13 +508,24 @@ out: return out - buf; } +static inline bool dev_latency_better(struct bch_dev *dev1, + struct bch_dev *dev2) +{ + unsigned l1 = atomic_read(&dev1->latency[READ]); + unsigned l2 = atomic_read(&dev2->latency[READ]); + + /* Pick at random, biased in favor of the faster device: */ + + return bch2_rand_range(l1 + l2) > l1; +} + static void extent_pick_read_device(struct bch_fs *c, struct bkey_s_c_extent e, struct bch_devs_mask *avoid, struct extent_pick_ptr *pick) { - const union bch_extent_crc *crc; const struct bch_extent_ptr *ptr; + struct bch_extent_crc_unpacked crc; extent_for_each_ptr_crc(e, ptr, crc) { struct bch_dev *ca = c->devs[ptr->dev]; @@ -516,12 +536,18 @@ static void extent_pick_read_device(struct bch_fs *c, if (ca->mi.state == BCH_MEMBER_STATE_FAILED) continue; - if (avoid && test_bit(ca->dev_idx, avoid->d)) - continue; + if (avoid) { + if (test_bit(ca->dev_idx, avoid->d)) + continue; - if (pick->ca && pick->ca->mi.tier < ca->mi.tier) - continue; + if (pick->ca && + test_bit(pick->ca->dev_idx, avoid->d)) + goto use; + } + if (pick->ca && !dev_latency_better(ca, pick->ca)) + continue; +use: if (!percpu_ref_tryget(&ca->io_ref)) continue; @@ -530,11 +556,9 @@ static void extent_pick_read_device(struct bch_fs *c, *pick = (struct extent_pick_ptr) { .ptr = *ptr, + .crc = crc, .ca = ca, }; - - if (e.k->size) - pick->crc = crc_to_128(e.k, crc); } } @@ -557,14 +581,17 @@ static const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; const struct bch_extent_ptr *ptr; - const union bch_extent_crc *crc; const char *reason; - extent_for_each_entry(e, entry) + extent_for_each_entry(e, entry) { if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) return "invalid extent entry type"; - extent_for_each_ptr_crc(e, ptr, crc) { + if (extent_entry_is_crc(entry)) + return "has crc field"; + } + + extent_for_each_ptr(e, ptr) { reason = extent_ptr_invalid(c, e, ptr, c->opts.btree_node_size, true); @@ -572,9 +599,6 @@ static const char *bch2_btree_ptr_invalid(const struct bch_fs *c, return reason; } - if (crc) - return "has crc field"; - return NULL; } @@ -699,28 +723,28 @@ static bool __bch2_cut_front(struct bpos where, struct bkey_s k) __set_bkey_deleted(k.k); else if (bkey_extent_is_data(k.k)) { struct bkey_s_extent e = bkey_s_to_extent(k); - struct bch_extent_ptr *ptr; - union bch_extent_crc *crc, *prev_crc = NULL; + union bch_extent_entry *entry; + bool seen_crc = false; - extent_for_each_ptr_crc(e, ptr, crc) { - switch (extent_crc_type(crc)) { - case BCH_EXTENT_CRC_NONE: - ptr->offset += e.k->size - len; + extent_for_each_entry(e, entry) { + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + if (!seen_crc) + entry->ptr.offset += e.k->size - len; break; - case BCH_EXTENT_CRC32: - if (prev_crc != crc) - crc->crc32.offset += e.k->size - len; + case BCH_EXTENT_ENTRY_crc32: + entry->crc32.offset += e.k->size - len; break; - case BCH_EXTENT_CRC64: - if (prev_crc != crc) - crc->crc64.offset += e.k->size - len; + case BCH_EXTENT_ENTRY_crc64: + entry->crc64.offset += e.k->size - len; break; - case BCH_EXTENT_CRC128: - if (prev_crc != crc) - crc->crc128.offset += e.k->size - len; + case BCH_EXTENT_ENTRY_crc128: + entry->crc128.offset += e.k->size - len; break; } - prev_crc = crc; + + if (extent_entry_is_crc(entry)) + seen_crc = true; } } @@ -989,7 +1013,7 @@ static void bch2_add_sectors(struct extent_insert_state *s, return; bch2_mark_key(c, k, sectors, false, gc_pos_btree_node(b), - &s->stats, s->trans->journal_res.seq); + &s->stats, s->trans->journal_res.seq, 0); } static void bch2_subtract_sectors(struct extent_insert_state *s, @@ -1123,7 +1147,7 @@ static void extent_insert_committed(struct extent_insert_state *s) if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY) && bkey_cmp(s->committed, insert->k.p) && - bkey_extent_is_compressed(bkey_i_to_s_c(insert))) { + bch2_extent_is_compressed(bkey_i_to_s_c(insert))) { /* XXX: possibly need to increase our reservation? */ bch2_cut_subtract_back(s, s->committed, bkey_i_to_s(&split.k)); @@ -1152,46 +1176,24 @@ done: s->trans->did_work = true; } -static enum extent_insert_hook_ret +static enum btree_insert_ret __extent_insert_advance_pos(struct extent_insert_state *s, struct bpos next_pos, struct bkey_s_c k) { struct extent_insert_hook *hook = s->trans->hook; - enum extent_insert_hook_ret ret; -#if 0 - /* - * Currently disabled for encryption - broken with fcollapse. Will have - * to reenable when versions are exposed for send/receive - versions - * will have to be monotonic then: - */ - if (k.k && k.k->size && - !bversion_zero(s->insert->k->k.version) && - bversion_cmp(k.k->version, s->insert->k->k.version) > 0) { - ret = BTREE_HOOK_NO_INSERT; - } else -#endif + enum btree_insert_ret ret; + if (hook) ret = hook->fn(hook, s->committed, next_pos, k, s->insert->k); else - ret = BTREE_HOOK_DO_INSERT; + ret = BTREE_INSERT_OK; EBUG_ON(bkey_deleted(&s->insert->k->k) || !s->insert->k->k.size); - switch (ret) { - case BTREE_HOOK_DO_INSERT: - break; - case BTREE_HOOK_NO_INSERT: - extent_insert_committed(s); - bch2_cut_subtract_front(s, next_pos, bkey_i_to_s(s->insert->k)); - - bch2_btree_iter_set_pos_same_leaf(s->insert->iter, next_pos); - break; - case BTREE_HOOK_RESTART_TRANS: - return ret; - } + if (ret == BTREE_INSERT_OK) + s->committed = next_pos; - s->committed = next_pos; return ret; } @@ -1199,39 +1201,28 @@ __extent_insert_advance_pos(struct extent_insert_state *s, * Update iter->pos, marking how much of @insert we've processed, and call hook * fn: */ -static enum extent_insert_hook_ret +static enum btree_insert_ret extent_insert_advance_pos(struct extent_insert_state *s, struct bkey_s_c k) { struct btree *b = s->insert->iter->nodes[0]; struct bpos next_pos = bpos_min(s->insert->k->k.p, k.k ? k.k->p : b->key.k.p); + enum btree_insert_ret ret; + + if (race_fault()) + return BTREE_INSERT_NEED_TRAVERSE; /* hole? */ if (k.k && bkey_cmp(s->committed, bkey_start_pos(k.k)) < 0) { - bool have_uncommitted = bkey_cmp(s->committed, - bkey_start_pos(&s->insert->k->k)) > 0; - - switch (__extent_insert_advance_pos(s, bkey_start_pos(k.k), - bkey_s_c_null)) { - case BTREE_HOOK_DO_INSERT: - break; - case BTREE_HOOK_NO_INSERT: - /* - * we had to split @insert and insert the committed - * part - need to bail out and recheck journal - * reservation/btree node before we advance pos past @k: - */ - if (have_uncommitted) - return BTREE_HOOK_NO_INSERT; - break; - case BTREE_HOOK_RESTART_TRANS: - return BTREE_HOOK_RESTART_TRANS; - } + ret = __extent_insert_advance_pos(s, bkey_start_pos(k.k), + bkey_s_c_null); + if (ret != BTREE_INSERT_OK) + return ret; } /* avoid redundant calls to hook fn: */ if (!bkey_cmp(s->committed, next_pos)) - return BTREE_HOOK_DO_INSERT; + return BTREE_INSERT_OK; return __extent_insert_advance_pos(s, next_pos, k); } @@ -1245,7 +1236,7 @@ extent_insert_check_split_compressed(struct extent_insert_state *s, unsigned sectors; if (overlap == BCH_EXTENT_OVERLAP_MIDDLE && - (sectors = bkey_extent_is_compressed(k))) { + (sectors = bch2_extent_is_compressed(k))) { int flags = BCH_DISK_RESERVATION_BTREE_LOCKS_HELD; if (s->trans->flags & BTREE_INSERT_NOFAIL) @@ -1277,6 +1268,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, struct btree_iter *iter = s->insert->iter; struct btree *b = iter->nodes[0]; struct btree_node_iter *node_iter = &iter->node_iters[0]; + enum btree_insert_ret ret; switch (overlap) { case BCH_EXTENT_OVERLAP_FRONT: @@ -1322,9 +1314,9 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, k.k->p = orig_pos; extent_save(b, node_iter, _k, k.k); - if (extent_insert_advance_pos(s, k.s_c) == - BTREE_HOOK_RESTART_TRANS) - return BTREE_INSERT_NEED_TRAVERSE; + ret = extent_insert_advance_pos(s, k.s_c); + if (ret != BTREE_INSERT_OK) + return ret; extent_insert_committed(s); /* @@ -1420,15 +1412,9 @@ bch2_delete_fixup_extent(struct extent_insert_state *s) if (ret != BTREE_INSERT_OK) goto stop; - switch (extent_insert_advance_pos(s, k.s_c)) { - case BTREE_HOOK_DO_INSERT: - break; - case BTREE_HOOK_NO_INSERT: - continue; - case BTREE_HOOK_RESTART_TRANS: - ret = BTREE_INSERT_NEED_TRAVERSE; + ret = extent_insert_advance_pos(s, k.s_c); + if (ret) goto stop; - } s->do_journal = true; @@ -1469,10 +1455,9 @@ next: bch2_btree_iter_set_pos_same_leaf(iter, s->committed); } - if (bkey_cmp(s->committed, insert->k.p) < 0 && - ret == BTREE_INSERT_OK && - extent_insert_advance_pos(s, bkey_s_c_null) == BTREE_HOOK_RESTART_TRANS) - ret = BTREE_INSERT_NEED_TRAVERSE; + if (ret == BTREE_INSERT_OK && + bkey_cmp(s->committed, insert->k.p) < 0) + ret = extent_insert_advance_pos(s, bkey_s_c_null); stop: extent_insert_committed(s); @@ -1594,18 +1579,10 @@ bch2_insert_fixup_extent(struct btree_insert *trans, /* * Only call advance pos & call hook for nonzero size extents: - * If hook returned BTREE_HOOK_NO_INSERT, @insert->k no longer - * overlaps with @k: */ - switch (extent_insert_advance_pos(&s, k.s_c)) { - case BTREE_HOOK_DO_INSERT: - break; - case BTREE_HOOK_NO_INSERT: - continue; - case BTREE_HOOK_RESTART_TRANS: - ret = BTREE_INSERT_NEED_TRAVERSE; + ret = extent_insert_advance_pos(&s, k.s_c); + if (ret != BTREE_INSERT_OK) goto stop; - } if (k.k->size && (k.k->needs_whiteout || bset_written(b, bset(b, t)))) @@ -1623,10 +1600,9 @@ squash: goto stop; } - if (bkey_cmp(s.committed, insert->k->k.p) < 0 && - ret == BTREE_INSERT_OK && - extent_insert_advance_pos(&s, bkey_s_c_null) == BTREE_HOOK_RESTART_TRANS) - ret = BTREE_INSERT_NEED_TRAVERSE; + if (ret == BTREE_INSERT_OK && + bkey_cmp(s.committed, insert->k->k.p) < 0) + ret = extent_insert_advance_pos(&s, bkey_s_c_null); stop: extent_insert_committed(&s); /* @@ -1669,29 +1645,37 @@ static const char *bch2_extent_invalid(const struct bch_fs *c, case BCH_EXTENT_CACHED: { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; - const union bch_extent_crc *crc; + struct bch_extent_crc_unpacked crc; const struct bch_extent_ptr *ptr; unsigned size_ondisk = e.k->size; const char *reason; + unsigned nonce = UINT_MAX; extent_for_each_entry(e, entry) { if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) return "invalid extent entry type"; if (extent_entry_is_crc(entry)) { - crc = entry_to_crc(entry); + crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - if (crc_offset(crc) + e.k->size > - crc_uncompressed_size(e.k, crc)) + if (crc.offset + e.k->size > + crc.uncompressed_size) return "checksum offset + key size > uncompressed size"; - size_ondisk = crc_compressed_size(e.k, crc); + size_ondisk = crc.compressed_size; - if (!bch2_checksum_type_valid(c, crc_csum_type(crc))) + if (!bch2_checksum_type_valid(c, crc.csum_type)) return "invalid checksum type"; - if (crc_compression_type(crc) >= BCH_COMPRESSION_NR) + if (crc.compression_type >= BCH_COMPRESSION_NR) return "invalid compression type"; + + if (bch2_csum_type_is_encryption(crc.csum_type)) { + if (nonce == UINT_MAX) + nonce = crc.offset + crc.nonce; + else if (nonce != crc.offset + crc.nonce) + return "incorrect nonce"; + } } else { ptr = entry_to_ptr(entry); @@ -1864,102 +1848,75 @@ static unsigned PTR_TIER(struct bch_fs *c, } static void bch2_extent_crc_init(union bch_extent_crc *crc, - unsigned compressed_size, - unsigned uncompressed_size, - unsigned compression_type, - unsigned nonce, - struct bch_csum csum, unsigned csum_type) -{ - if (bch_crc_bytes[csum_type] <= 4 && - uncompressed_size <= CRC32_SIZE_MAX && - nonce <= CRC32_NONCE_MAX) { + struct bch_extent_crc_unpacked new) +{ +#define common_fields(_crc) \ + .csum_type = _crc.csum_type, \ + .compression_type = _crc.compression_type, \ + ._compressed_size = _crc.compressed_size - 1, \ + ._uncompressed_size = _crc.uncompressed_size - 1, \ + .offset = _crc.offset + + if (bch_crc_bytes[new.csum_type] <= 4 && + new.uncompressed_size <= CRC32_SIZE_MAX && + new.nonce <= CRC32_NONCE_MAX) { crc->crc32 = (struct bch_extent_crc32) { .type = 1 << BCH_EXTENT_ENTRY_crc32, - ._compressed_size = compressed_size - 1, - ._uncompressed_size = uncompressed_size - 1, - .offset = 0, - .compression_type = compression_type, - .csum_type = csum_type, - .csum = *((__le32 *) &csum.lo), + common_fields(new), + .csum = *((__le32 *) &new.csum.lo), }; return; } - if (bch_crc_bytes[csum_type] <= 10 && - uncompressed_size <= CRC64_SIZE_MAX && - nonce <= CRC64_NONCE_MAX) { + if (bch_crc_bytes[new.csum_type] <= 10 && + new.uncompressed_size <= CRC64_SIZE_MAX && + new.nonce <= CRC64_NONCE_MAX) { crc->crc64 = (struct bch_extent_crc64) { .type = 1 << BCH_EXTENT_ENTRY_crc64, - ._compressed_size = compressed_size - 1, - ._uncompressed_size = uncompressed_size - 1, - .offset = 0, - .nonce = nonce, - .compression_type = compression_type, - .csum_type = csum_type, - .csum_lo = csum.lo, - .csum_hi = *((__le16 *) &csum.hi), + common_fields(new), + .nonce = new.nonce, + .csum_lo = new.csum.lo, + .csum_hi = *((__le16 *) &new.csum.hi), }; return; } - if (bch_crc_bytes[csum_type] <= 16 && - uncompressed_size <= CRC128_SIZE_MAX && - nonce <= CRC128_NONCE_MAX) { + if (bch_crc_bytes[new.csum_type] <= 16 && + new.uncompressed_size <= CRC128_SIZE_MAX && + new.nonce <= CRC128_NONCE_MAX) { crc->crc128 = (struct bch_extent_crc128) { .type = 1 << BCH_EXTENT_ENTRY_crc128, - ._compressed_size = compressed_size - 1, - ._uncompressed_size = uncompressed_size - 1, - .offset = 0, - .nonce = nonce, - .compression_type = compression_type, - .csum_type = csum_type, - .csum = csum, + common_fields(new), + .nonce = new.nonce, + .csum = new.csum, }; return; } - +#undef common_fields BUG(); } void bch2_extent_crc_append(struct bkey_i_extent *e, - unsigned compressed_size, - unsigned uncompressed_size, - unsigned compression_type, - unsigned nonce, - struct bch_csum csum, unsigned csum_type) + struct bch_extent_crc_unpacked new) { - union bch_extent_crc *crc; + struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *i; - BUG_ON(compressed_size > uncompressed_size); - BUG_ON(uncompressed_size != e->k.size); - BUG_ON(!compressed_size || !uncompressed_size); + BUG_ON(new.compressed_size > new.uncompressed_size); + BUG_ON(new.live_size != e->k.size); + BUG_ON(!new.compressed_size || !new.uncompressed_size); /* * Look up the last crc entry, so we can check if we need to add * another: */ - extent_for_each_crc(extent_i_to_s(e), crc) + extent_for_each_crc(extent_i_to_s(e), crc, i) ; - if (!crc && !csum_type && !compression_type) - return; - - if (crc && - crc_compressed_size(&e->k, crc) == compressed_size && - crc_uncompressed_size(&e->k, crc) == uncompressed_size && - crc_offset(crc) == 0 && - crc_nonce(crc) == nonce && - crc_csum_type(crc) == csum_type && - crc_compression_type(crc) == compression_type && - crc_csum(crc).lo == csum.lo && - crc_csum(crc).hi == csum.hi) + if (!memcmp(&crc, &new, sizeof(crc))) return; - bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), - compressed_size, - uncompressed_size, - compression_type, - nonce, csum, csum_type); + bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new); __extent_entry_push(e); } @@ -2011,16 +1968,22 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) } void bch2_extent_mark_replicas_cached(struct bch_fs *c, - struct bkey_s_extent e, - unsigned nr_cached) + struct bkey_s_extent e) { struct bch_extent_ptr *ptr; + unsigned tier = 0, nr_cached = 0, nr_good = 0; bool have_higher_tier; - unsigned tier = 0; - if (!nr_cached) + extent_for_each_ptr(e, ptr) + if (!ptr->cached && + c->devs[ptr->dev]->mi.state != BCH_MEMBER_STATE_FAILED) + nr_good++; + + if (nr_good <= c->opts.data_replicas) return; + nr_cached = nr_good - c->opts.data_replicas; + do { have_higher_tier = false; |