diff options
Diffstat (limited to 'libbcachefs/extents.c')
-rw-r--r-- | libbcachefs/extents.c | 332 |
1 files changed, 148 insertions, 184 deletions
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index a4d7e52b..6eaa89c9 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -88,7 +88,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, memset(&nr, 0, sizeof(nr)); - heap_resort(iter, key_sort_cmp); + heap_resort(iter, key_sort_cmp, NULL); while (!bch2_btree_node_iter_large_end(iter)) { if (!should_drop_next_key(iter, b)) { @@ -101,7 +101,7 @@ struct btree_nr_keys bch2_key_sort_fix_overlapping(struct bset *dst, } sort_key_next(iter, b, iter->data); - heap_sift_down(iter, 0, key_sort_cmp); + heap_sift_down(iter, 0, key_sort_cmp, NULL); } dst->u64s = cpu_to_le16((u64 *) out - dst->_data); @@ -122,20 +122,11 @@ bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) return NULL; } -bool bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev) +void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev) { struct bch_extent_ptr *ptr; - bool dropped = false; - extent_for_each_ptr_backwards(e, ptr) - if (ptr->dev == dev) { - __bch2_extent_drop_ptr(e, ptr); - dropped = true; - } - - if (dropped) - bch2_extent_drop_redundant_crcs(e); - return dropped; + bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev); } const struct bch_extent_ptr * @@ -231,21 +222,21 @@ unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e) unsigned bch2_extent_is_compressed(struct bkey_s_c k) { - struct bkey_s_c_extent e; - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; unsigned ret = 0; switch (k.k->type) { case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_c_to_extent(k); + case BCH_EXTENT_CACHED: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - extent_for_each_ptr_crc(e, ptr, crc) - if (!ptr->cached && - crc.compression_type != BCH_COMPRESSION_NONE && - crc.compressed_size < crc.live_size) - ret = max_t(unsigned, ret, crc.compressed_size); + extent_for_each_ptr_decode(e, p, entry) + if (!p.ptr.cached && + p.crc.compression_type != BCH_COMPRESSION_NONE && + p.crc.compressed_size < p.crc.live_size) + ret = max_t(unsigned, ret, p.crc.compressed_size); + } } return ret; @@ -254,34 +245,50 @@ unsigned bch2_extent_is_compressed(struct bkey_s_c k) bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, struct bch_extent_ptr m, u64 offset) { - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - extent_for_each_ptr_crc(e, ptr, crc) - if (ptr->dev == m.dev && - ptr->gen == m.gen && - (s64) ptr->offset + crc.offset - bkey_start_offset(e.k) == + extent_for_each_ptr_decode(e, p, entry) + if (p.ptr.dev == m.dev && + p.ptr.gen == m.gen && + (s64) p.ptr.offset + p.crc.offset - bkey_start_offset(e.k) == (s64) m.offset - offset) - return ptr; + return true; - return NULL; + return false; } -/* Doesn't cleanup redundant crcs */ -void __bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr) +union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, + struct bch_extent_ptr *ptr) { + union bch_extent_entry *dst; + union bch_extent_entry *src; + EBUG_ON(ptr < &e.v->start->ptr || ptr >= &extent_entry_last(e)->ptr); EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); - memmove_u64s_down(ptr, ptr + 1, - (u64 *) extent_entry_last(e) - (u64 *) (ptr + 1)); - e.k->u64s -= sizeof(*ptr) / sizeof(u64); -} -void bch2_extent_drop_ptr(struct bkey_s_extent e, struct bch_extent_ptr *ptr) -{ - __bch2_extent_drop_ptr(e, ptr); - bch2_extent_drop_redundant_crcs(e); + src = to_entry(ptr + 1); + + if (src != extent_entry_last(e) && + extent_entry_type(src) == BCH_EXTENT_ENTRY_ptr) { + dst = to_entry(ptr); + } else { + extent_for_each_entry(e, dst) { + if (dst == to_entry(ptr)) + break; + + if (extent_entry_next(dst) == to_entry(ptr) && + extent_entry_is_crc(dst)) + break; + } + } + + memmove_u64s_down(dst, src, + (u64 *) extent_entry_last(e) - (u64 *) src); + e.k->u64s -= (u64 *) src - (u64 *) dst; + + return dst; } static inline bool can_narrow_crc(struct bch_extent_crc_unpacked u, @@ -323,38 +330,38 @@ bool bch2_extent_narrow_crcs(struct bkey_i_extent *e, struct bch_extent_crc_unpacked n) { struct bch_extent_crc_unpacked u; - struct bch_extent_ptr *ptr; + struct extent_ptr_decoded p; union bch_extent_entry *i; + bool ret = false; /* Find a checksum entry that covers only live data: */ - if (!n.csum_type) + if (!n.csum_type) { extent_for_each_crc(extent_i_to_s(e), u, i) if (!u.compression_type && u.csum_type && u.live_size == u.uncompressed_size) { n = u; - break; + goto found; } - - if (!bch2_can_narrow_extent_crcs(extent_i_to_s_c(e), n)) return false; - + } +found: BUG_ON(n.compression_type); BUG_ON(n.offset); BUG_ON(n.live_size != e->k.size); - bch2_extent_crc_append(e, n); restart_narrow_pointers: - extent_for_each_ptr_crc(extent_i_to_s(e), ptr, u) - if (can_narrow_crc(u, n)) { - ptr->offset += u.offset; - extent_ptr_append(e, *ptr); - __bch2_extent_drop_ptr(extent_i_to_s(e), ptr); + extent_for_each_ptr_decode(extent_i_to_s(e), p, i) + if (can_narrow_crc(p.crc, n)) { + bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr); + p.ptr.offset += p.crc.offset; + p.crc = n; + bch2_extent_ptr_decoded_append(e, &p); + ret = true; goto restart_narrow_pointers; } - bch2_extent_drop_redundant_crcs(extent_i_to_s(e)); - return true; + return ret; } /* returns true if not equal */ @@ -371,87 +378,13 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l, bch2_crc_cmp(l.csum, r.csum)); } -void bch2_extent_drop_redundant_crcs(struct bkey_s_extent e) -{ - union bch_extent_entry *entry = e.v->start; - union bch_extent_crc *crc, *prev = NULL; - struct bch_extent_crc_unpacked u, prev_u = { 0 }; - - while (entry != extent_entry_last(e)) { - union bch_extent_entry *next = extent_entry_next(entry); - size_t crc_u64s = extent_entry_u64s(entry); - - if (!extent_entry_is_crc(entry)) - goto next; - - crc = entry_to_crc(entry); - u = bch2_extent_crc_unpack(e.k, crc); - - if (next == extent_entry_last(e)) { - /* crc entry with no pointers after it: */ - goto drop; - } - - if (extent_entry_is_crc(next)) { - /* no pointers before next crc entry: */ - goto drop; - } - - if (prev && !bch2_crc_unpacked_cmp(u, prev_u)) { - /* identical to previous crc entry: */ - goto drop; - } - - if (!prev && - !u.csum_type && - !u.compression_type) { - /* null crc entry: */ - union bch_extent_entry *e2; - - extent_for_each_entry_from(e, e2, extent_entry_next(entry)) { - if (!extent_entry_is_ptr(e2)) - break; - - e2->ptr.offset += u.offset; - } - goto drop; - } - - prev = crc; - prev_u = u; -next: - entry = next; - continue; -drop: - memmove_u64s_down(crc, next, - (u64 *) extent_entry_last(e) - (u64 *) next); - e.k->u64s -= crc_u64s; - } - - EBUG_ON(bkey_val_u64s(e.k) && !bch2_extent_nr_ptrs(e.c)); -} - -static bool should_drop_ptr(const struct bch_fs *c, - struct bkey_s_c_extent e, - const struct bch_extent_ptr *ptr) -{ - return ptr->cached && ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr); -} - static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e) { - struct bch_extent_ptr *ptr = &e.v->start->ptr; - bool dropped = false; - - while ((ptr = extent_ptr_next(e, ptr))) - if (should_drop_ptr(c, e.c, ptr)) { - __bch2_extent_drop_ptr(e, ptr); - dropped = true; - } else - ptr++; + struct bch_extent_ptr *ptr; - if (dropped) - bch2_extent_drop_redundant_crcs(e); + bch2_extent_drop_ptrs(e, ptr, + ptr->cached && + ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); } bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k) @@ -475,6 +408,8 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); entry = extent_entry_next(entry)) { switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + break; case BCH_EXTENT_ENTRY_crc32: entry->crc32.csum = swab32(entry->crc32.csum); break; @@ -488,8 +423,6 @@ void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) entry->crc128.csum.lo = (__force __le64) swab64((__force u64) entry->crc128.csum.lo); break; - case BCH_EXTENT_ENTRY_ptr: - break; } } break; @@ -586,12 +519,45 @@ out: return out - buf; } -static inline bool dev_latency_better(struct bch_fs *c, - const struct bch_extent_ptr *ptr1, - const struct bch_extent_ptr *ptr2) +static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, + unsigned dev) +{ + struct bch_dev_io_failures *i; + + for (i = f->devs; i < f->devs + f->nr; i++) + if (i->dev == dev) + return i; + + return NULL; +} + +void bch2_mark_io_failure(struct bch_io_failures *failed, + struct extent_ptr_decoded *p) +{ + struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); + + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + f->dev = p->ptr.dev; + f->nr_failed = 1; + f->nr_retries = 0; + } else { + f->nr_failed++; + } +} + +/* + * returns true if p1 is better than p2: + */ +static inline bool ptr_better(struct bch_fs *c, + const struct extent_ptr_decoded p1, + const struct extent_ptr_decoded p2) { - struct bch_dev *dev1 = bch_dev_bkey_exists(c, ptr1->dev); - struct bch_dev *dev2 = bch_dev_bkey_exists(c, ptr2->dev); + struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); + struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); + u64 l1 = atomic64_read(&dev1->cur_latency[READ]); u64 l2 = atomic64_read(&dev2->cur_latency[READ]); @@ -602,31 +568,29 @@ static inline bool dev_latency_better(struct bch_fs *c, static int extent_pick_read_device(struct bch_fs *c, struct bkey_s_c_extent e, - struct bch_devs_mask *avoid, - struct extent_pick_ptr *pick) + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) { - const struct bch_extent_ptr *ptr; - struct bch_extent_crc_unpacked crc; + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bch_dev_io_failures *f; struct bch_dev *ca; int ret = 0; - extent_for_each_ptr_crc(e, ptr, crc) { - ca = bch_dev_bkey_exists(c, ptr->dev); + extent_for_each_ptr_decode(e, p, entry) { + ca = bch_dev_bkey_exists(c, p.ptr.dev); - if (ptr->cached && ptr_stale(ca, ptr)) + if (p.ptr.cached && ptr_stale(ca, &p.ptr)) continue; - if (avoid && test_bit(ptr->dev, avoid->d)) + f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; + if (f && f->nr_failed >= f->nr_retries) continue; - if (ret && !dev_latency_better(c, ptr, &pick->ptr)) + if (ret && !ptr_better(c, p, *pick)) continue; - *pick = (struct extent_pick_ptr) { - .ptr = *ptr, - .crc = crc, - }; - + *pick = p; ret = 1; } @@ -715,7 +679,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, goto err; } - if (!bch2_bkey_replicas_marked(c, BCH_DATA_BTREE, e.s_c)) { + if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) { bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), k); bch2_fs_bug(c, @@ -752,11 +716,11 @@ int bch2_btree_ptr_to_text(struct bch_fs *c, char *buf, } int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, - struct bch_devs_mask *avoid, - struct extent_pick_ptr *pick) + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) { return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key), - avoid, pick); + failed, pick); } /* Extents */ @@ -908,7 +872,7 @@ static bool extent_i_save(struct btree *b, struct bkey_packed *dst, static inline void extent_sort_sift(struct btree_node_iter_large *iter, struct btree *b, size_t i) { - heap_sift_down(iter, i, extent_sort_cmp); + heap_sift_down(iter, i, extent_sort_cmp, NULL); } static inline void extent_sort_next(struct btree_node_iter_large *iter, @@ -916,7 +880,7 @@ static inline void extent_sort_next(struct btree_node_iter_large *iter, struct btree_node_iter_set *i) { sort_key_next(iter, b, i); - heap_sift_down(iter, i - iter->data, extent_sort_cmp); + heap_sift_down(iter, i - iter->data, extent_sort_cmp, NULL); } static void extent_sort_append(struct bch_fs *c, @@ -964,7 +928,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, memset(&nr, 0, sizeof(nr)); - heap_resort(iter, extent_sort_cmp); + heap_resort(iter, extent_sort_cmp, NULL); while (!bch2_btree_node_iter_large_end(iter)) { lk = __btree_node_offset_to_key(b, _l->k); @@ -1076,8 +1040,9 @@ static void bch2_add_sectors(struct extent_insert_state *s, if (!sectors) return; - bch2_mark_key(c, k, sectors, BCH_DATA_USER, gc_pos_btree_node(b), - &s->stats, s->trans->journal_res.seq, 0); + bch2_mark_key(c, BKEY_TYPE_EXTENTS, k, sectors > 0, sectors, + gc_pos_btree_node(b), &s->stats, + s->trans->journal_res.seq, 0); } static void bch2_subtract_sectors(struct extent_insert_state *s, @@ -1748,8 +1713,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, return; } - if (!bkey_extent_is_cached(e.k) && - !bch2_bkey_replicas_marked(c, BCH_DATA_USER, e.s_c)) { + if (!bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) { bch2_bkey_val_to_text(c, btree_node_type(b), buf, sizeof(buf), e.s_c); bch2_fs_bug(c, @@ -1853,25 +1817,25 @@ static void bch2_extent_crc_init(union bch_extent_crc *crc, void bch2_extent_crc_append(struct bkey_i_extent *e, struct bch_extent_crc_unpacked new) { - struct bch_extent_crc_unpacked crc; - const union bch_extent_entry *i; - - BUG_ON(new.compressed_size > new.uncompressed_size); - BUG_ON(new.live_size != e->k.size); - BUG_ON(!new.compressed_size || !new.uncompressed_size); + bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new); + __extent_entry_push(e); +} - /* - * Look up the last crc entry, so we can check if we need to add - * another: - */ - extent_for_each_crc(extent_i_to_s(e), crc, i) - ; +void bch2_extent_ptr_decoded_append(struct bkey_i_extent *e, + struct extent_ptr_decoded *p) +{ + struct bch_extent_crc_unpacked crc; + union bch_extent_entry *pos; - if (!bch2_crc_unpacked_cmp(crc, new)) - return; + extent_for_each_crc(extent_i_to_s(e), crc, pos) + if (!bch2_crc_unpacked_cmp(crc, p->crc)) + goto found; - bch2_extent_crc_init((void *) extent_entry_last(extent_i_to_s(e)), new); - __extent_entry_push(e); + bch2_extent_crc_append(e, p->crc); + pos = extent_entry_last(extent_i_to_s(e)); +found: + p->ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; + __extent_entry_insert(e, pos, to_entry(&p->ptr)); } /* @@ -1957,8 +1921,8 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, * other devices, it will still pick a pointer from avoid. */ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, - struct bch_devs_mask *avoid, - struct extent_pick_ptr *pick) + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) { int ret; @@ -1969,7 +1933,7 @@ int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT: case BCH_EXTENT_CACHED: ret = extent_pick_read_device(c, bkey_s_c_to_extent(k), - avoid, pick); + failed, pick); if (!ret && !bkey_extent_is_cached(k.k)) ret = -EIO; |