diff options
Diffstat (limited to 'libbcachefs/extents.c')
-rw-r--r-- | libbcachefs/extents.c | 414 |
1 files changed, 227 insertions, 187 deletions
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index f8f29251..dffcc144 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -500,43 +500,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) } } -static const char *extent_ptr_invalid(const struct bch_fs *c, - struct bkey_s_c k, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata) -{ - struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr2; - struct bch_dev *ca; - - if (ptr->dev >= c->sb.nr_devices || - !c->devs[ptr->dev]) - return "pointer to invalid device"; - - ca = bch_dev_bkey_exists(c, ptr->dev); - if (!ca) - return "pointer to invalid device"; - - bkey_for_each_ptr(ptrs, ptr2) - if (ptr != ptr2 && ptr->dev == ptr2->dev) - return "multiple pointers to same device"; - - if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets)) - return "offset past end of device"; - - if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) - return "offset before first bucket"; - - if (bucket_remainder(ca, ptr->offset) + - size_ondisk > ca->mi.bucket_size) - return "spans multiple buckets"; - - return NULL; -} - -static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c k) +void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -590,37 +555,109 @@ static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -/* Btree ptrs */ +static const char *extent_ptr_invalid(const struct bch_fs *c, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr2; + struct bch_dev *ca; -const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) + if (!bch2_dev_exists2(c, ptr->dev)) + return "pointer to invalid device"; + + ca = bch_dev_bkey_exists(c, ptr->dev); + if (!ca) + return "pointer to invalid device"; + + bkey_for_each_ptr(ptrs, ptr2) + if (ptr != ptr2 && ptr->dev == ptr2->dev) + return "multiple pointers to same device"; + + if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets)) + return "offset past end of device"; + + if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) + return "offset before first bucket"; + + if (bucket_remainder(ca, ptr->offset) + + size_ondisk > ca->mi.bucket_size) + return "spans multiple buckets"; + + return NULL; +} + +const char *bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - const struct bch_extent_ptr *ptr; + struct bch_extent_crc_unpacked crc; + unsigned size_ondisk = k.k->size; const char *reason; + unsigned nonce = UINT_MAX; - if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) - return "value too big"; + if (k.k->type == KEY_TYPE_btree_ptr) + size_ondisk = c->opts.btree_node_size; bkey_extent_entry_for_each(ptrs, entry) { if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) return "invalid extent entry type"; - if (!extent_entry_is_ptr(entry)) + if (k.k->type == KEY_TYPE_btree_ptr && + !extent_entry_is_ptr(entry)) return "has non ptr field"; - } - bkey_for_each_ptr(ptrs, ptr) { - reason = extent_ptr_invalid(c, k, ptr, - c->opts.btree_node_size, - true); - if (reason) - return reason; + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + reason = extent_ptr_invalid(c, k, &entry->ptr, + size_ondisk, false); + if (reason) + return reason; + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); + + if (crc.offset + crc.live_size > + crc.uncompressed_size) + return "checksum offset + key size > uncompressed size"; + + size_ondisk = crc.compressed_size; + + if (!bch2_checksum_type_valid(c, crc.csum_type)) + return "invalid checksum type"; + + if (crc.compression_type >= BCH_COMPRESSION_NR) + return "invalid compression type"; + + if (bch2_csum_type_is_encryption(crc.csum_type)) { + if (nonce == UINT_MAX) + nonce = crc.offset + crc.nonce; + else if (nonce != crc.offset + crc.nonce) + return "incorrect nonce"; + } + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + break; + } } return NULL; } +/* Btree ptrs */ + +const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) + return "value too big"; + + return bch2_bkey_ptrs_invalid(c, k); +} + void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { @@ -665,13 +702,7 @@ err: void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - const char *invalid; - - bkey_ptrs_to_text(out, c, k); - - invalid = bch2_btree_ptr_invalid(c, k); - if (invalid) - pr_buf(out, " invalid: %s", invalid); + bch2_bkey_ptrs_to_text(out, c, k); } /* Extents */ @@ -1260,60 +1291,10 @@ void bch2_insert_fixup_extent(struct btree_trans *trans, const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - unsigned size_ondisk = e.k->size; - const char *reason; - unsigned nonce = UINT_MAX; - - if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX) + if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX) return "value too big"; - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; - - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); - - reason = extent_ptr_invalid(c, e.s_c, &entry->ptr, - size_ondisk, false); - if (reason) - return reason; - break; - case BCH_EXTENT_ENTRY_crc32: - case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - - if (crc.offset + e.k->size > - crc.uncompressed_size) - return "checksum offset + key size > uncompressed size"; - - size_ondisk = crc.compressed_size; - - if (!bch2_checksum_type_valid(c, crc.csum_type)) - return "invalid checksum type"; - - if (crc.compression_type >= BCH_COMPRESSION_NR) - return "invalid compression type"; - - if (bch2_csum_type_is_encryption(crc.csum_type)) { - if (nonce == UINT_MAX) - nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) - return "incorrect nonce"; - } - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; - } - } - - return NULL; + return bch2_bkey_ptrs_invalid(c, k); } void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, @@ -1374,62 +1355,66 @@ void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - const char *invalid; + bch2_bkey_ptrs_to_text(out, c, k); +} - bkey_ptrs_to_text(out, c, k); +static unsigned bch2_crc_field_size_max[] = { + [BCH_EXTENT_ENTRY_crc32] = CRC32_SIZE_MAX, + [BCH_EXTENT_ENTRY_crc64] = CRC64_SIZE_MAX, + [BCH_EXTENT_ENTRY_crc128] = CRC128_SIZE_MAX, +}; - invalid = bch2_extent_invalid(c, k); - if (invalid) - pr_buf(out, " invalid: %s", invalid); +static void bch2_extent_crc_pack(union bch_extent_crc *dst, + struct bch_extent_crc_unpacked src) +{ +#define set_common_fields(_dst, _src) \ + _dst.csum_type = _src.csum_type, \ + _dst.compression_type = _src.compression_type, \ + _dst._compressed_size = _src.compressed_size - 1, \ + _dst._uncompressed_size = _src.uncompressed_size - 1, \ + _dst.offset = _src.offset + + switch (extent_entry_type(to_entry(dst))) { + case BCH_EXTENT_ENTRY_crc32: + set_common_fields(dst->crc32, src); + dst->crc32.csum = *((__le32 *) &src.csum.lo); + break; + case BCH_EXTENT_ENTRY_crc64: + set_common_fields(dst->crc64, src); + dst->crc64.nonce = src.nonce; + dst->crc64.csum_lo = src.csum.lo; + dst->crc64.csum_hi = *((__le16 *) &src.csum.hi); + break; + case BCH_EXTENT_ENTRY_crc128: + set_common_fields(dst->crc128, src); + dst->crc128.nonce = src.nonce; + dst->crc128.csum = src.csum; + break; + default: + BUG(); + } +#undef set_common_fields } static void bch2_extent_crc_init(union bch_extent_crc *crc, struct bch_extent_crc_unpacked new) { -#define common_fields(_crc) \ - .csum_type = _crc.csum_type, \ - .compression_type = _crc.compression_type, \ - ._compressed_size = _crc.compressed_size - 1, \ - ._uncompressed_size = _crc.uncompressed_size - 1, \ - .offset = _crc.offset - if (bch_crc_bytes[new.csum_type] <= 4 && - new.uncompressed_size <= CRC32_SIZE_MAX && - new.nonce <= CRC32_NONCE_MAX) { - crc->crc32 = (struct bch_extent_crc32) { - .type = 1 << BCH_EXTENT_ENTRY_crc32, - common_fields(new), - .csum = *((__le32 *) &new.csum.lo), - }; - return; - } - - if (bch_crc_bytes[new.csum_type] <= 10 && - new.uncompressed_size <= CRC64_SIZE_MAX && - new.nonce <= CRC64_NONCE_MAX) { - crc->crc64 = (struct bch_extent_crc64) { - .type = 1 << BCH_EXTENT_ENTRY_crc64, - common_fields(new), - .nonce = new.nonce, - .csum_lo = new.csum.lo, - .csum_hi = *((__le16 *) &new.csum.hi), - }; - return; - } + new.uncompressed_size - 1 <= CRC32_SIZE_MAX && + new.nonce <= CRC32_NONCE_MAX) + crc->type = 1 << BCH_EXTENT_ENTRY_crc32; + else if (bch_crc_bytes[new.csum_type] <= 10 && + new.uncompressed_size - 1 <= CRC64_SIZE_MAX && + new.nonce <= CRC64_NONCE_MAX) + crc->type = 1 << BCH_EXTENT_ENTRY_crc64; + else if (bch_crc_bytes[new.csum_type] <= 16 && + new.uncompressed_size - 1 <= CRC128_SIZE_MAX && + new.nonce <= CRC128_NONCE_MAX) + crc->type = 1 << BCH_EXTENT_ENTRY_crc128; + else + BUG(); - if (bch_crc_bytes[new.csum_type] <= 16 && - new.uncompressed_size <= CRC128_SIZE_MAX && - new.nonce <= CRC128_NONCE_MAX) { - crc->crc128 = (struct bch_extent_crc128) { - .type = 1 << BCH_EXTENT_ENTRY_crc128, - common_fields(new), - .nonce = new.nonce, - .csum = new.csum, - }; - return; - } -#undef common_fields - BUG(); + bch2_extent_crc_pack(crc, new); } void bch2_extent_crc_append(struct bkey_i_extent *e, @@ -1454,10 +1439,15 @@ static inline void __extent_entry_insert(struct bkey_i_extent *e, void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e, struct extent_ptr_decoded *p) { - struct bch_extent_crc_unpacked crc; + struct bch_extent_crc_unpacked crc = bch2_extent_crc_unpack(&e->k, NULL); union bch_extent_entry *pos; unsigned i; + if (!bch2_crc_unpacked_cmp(crc, p->crc)) { + pos = e->v.start; + goto found; + } + extent_for_each_crc(extent_i_to_s(e), crc, pos) if (!bch2_crc_unpacked_cmp(crc, p->crc)) { pos = extent_entry_next(pos); @@ -1535,46 +1525,101 @@ enum merge_result bch2_extent_merge(struct bch_fs *c, { struct bkey_s_extent el = bkey_i_to_s_extent(l); struct bkey_s_extent er = bkey_i_to_s_extent(r); - union bch_extent_entry *en_l, *en_r; + union bch_extent_entry *en_l = el.v->start; + union bch_extent_entry *en_r = er.v->start; + struct bch_extent_crc_unpacked crc_l, crc_r; if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k)) return BCH_MERGE_NOMERGE; - extent_for_each_entry(el, en_l) { - struct bch_extent_ptr *lp, *rp; - struct bch_dev *ca; + crc_l = bch2_extent_crc_unpack(el.k, NULL); + extent_for_each_entry(el, en_l) { en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - if ((extent_entry_type(en_l) != - extent_entry_type(en_r)) || - !extent_entry_is_ptr(en_l)) + if (extent_entry_type(en_l) != extent_entry_type(en_r)) return BCH_MERGE_NOMERGE; - lp = &en_l->ptr; - rp = &en_r->ptr; + switch (extent_entry_type(en_l)) { + case BCH_EXTENT_ENTRY_ptr: { + const struct bch_extent_ptr *lp = &en_l->ptr; + const struct bch_extent_ptr *rp = &en_r->ptr; + struct bch_dev *ca; - if (lp->offset + el.k->size != rp->offset || - lp->dev != rp->dev || - lp->gen != rp->gen) - return BCH_MERGE_NOMERGE; + if (lp->offset + crc_l.compressed_size != rp->offset || + lp->dev != rp->dev || + lp->gen != rp->gen) + return BCH_MERGE_NOMERGE; + + /* We don't allow extents to straddle buckets: */ + ca = bch_dev_bkey_exists(c, lp->dev); - /* We don't allow extents to straddle buckets: */ - ca = bch_dev_bkey_exists(c, lp->dev); + if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) + return BCH_MERGE_NOMERGE; - if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) + break; + } + case BCH_EXTENT_ENTRY_stripe_ptr: + if (en_l->stripe_ptr.block != en_r->stripe_ptr.block || + en_l->stripe_ptr.idx != en_r->stripe_ptr.idx) + return BCH_MERGE_NOMERGE; + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l)); + crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r)); + + if (crc_l.csum_type != crc_r.csum_type || + crc_l.compression_type != crc_r.compression_type || + crc_l.nonce != crc_r.nonce) + return BCH_MERGE_NOMERGE; + + if (crc_l.offset + crc_l.live_size != crc_l.compressed_size || + crc_r.offset) + return BCH_MERGE_NOMERGE; + + if (!bch2_checksum_mergeable(crc_l.csum_type)) + return BCH_MERGE_NOMERGE; + + if (crc_l.compression_type) + return BCH_MERGE_NOMERGE; + + if (crc_l.csum_type && + crc_l.uncompressed_size + + crc_r.uncompressed_size > c->sb.encoded_extent_max) + return BCH_MERGE_NOMERGE; + + if (crc_l.uncompressed_size + crc_r.uncompressed_size - 1 > + bch2_crc_field_size_max[extent_entry_type(en_l)]) + return BCH_MERGE_NOMERGE; + + break; + default: return BCH_MERGE_NOMERGE; + } } - l->k.needs_whiteout |= r->k.needs_whiteout; + extent_for_each_entry(el, en_l) { + struct bch_extent_crc_unpacked crc_l, crc_r; - /* Keys with no pointers aren't restricted to one bucket and could - * overflow KEY_SIZE - */ - if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) { - bch2_key_resize(&l->k, KEY_SIZE_MAX); - bch2_cut_front(l->k.p, r); - return BCH_MERGE_PARTIAL; + en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); + + if (!extent_entry_is_crc(en_l)) + continue; + + crc_l = bch2_extent_crc_unpack(el.k, entry_to_crc(en_l)); + crc_r = bch2_extent_crc_unpack(er.k, entry_to_crc(en_r)); + + crc_l.csum = bch2_checksum_merge(crc_l.csum_type, + crc_l.csum, + crc_r.csum, + crc_r.uncompressed_size << 9); + + crc_l.uncompressed_size += crc_r.uncompressed_size; + crc_l.compressed_size += crc_r.compressed_size; + + bch2_extent_crc_pack(entry_to_crc(en_l), crc_l); } bch2_key_resize(&l->k, l->k.size + r->k.size); @@ -1670,7 +1715,7 @@ bool bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size, end.offset += size; - bch2_trans_init(&trans, c); + bch2_trans_init(&trans, c, 0, 0); for_each_btree_key(&trans, iter, BTREE_ID_EXTENTS, pos, BTREE_ITER_SLOTS, k, err) { @@ -1745,11 +1790,6 @@ enum merge_result bch2_reservation_merge(struct bch_fs *c, li->v.nr_replicas != ri->v.nr_replicas) return BCH_MERGE_NOMERGE; - l->k.needs_whiteout |= r->k.needs_whiteout; - - /* Keys with no pointers aren't restricted to one bucket and could - * overflow KEY_SIZE - */ if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) { bch2_key_resize(&l->k, KEY_SIZE_MAX); bch2_cut_front(l->k.p, r); |