From 298533c031beaf1c0479c25fb0119effaf9f65b9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 29 Apr 2020 21:18:34 -0400 Subject: Merge with d7dbd2ce12 bcachefs: Add some printks for error paths --- fs/bcachefs/bcachefs.h | 2 +- fs/bcachefs/bkey_methods.c | 95 ++++++++------- fs/bcachefs/btree_gc.c | 10 +- fs/bcachefs/btree_io.c | 2 +- fs/bcachefs/btree_locking.h | 3 +- fs/bcachefs/checksum.c | 31 ++--- fs/bcachefs/checksum.h | 6 +- fs/bcachefs/compress.c | 2 +- fs/bcachefs/extent_update.c | 36 ++++-- fs/bcachefs/fs-io.c | 285 +++++--------------------------------------- fs/bcachefs/fs-io.h | 4 - fs/bcachefs/fs.c | 13 +- fs/bcachefs/io.c | 4 +- fs/bcachefs/move.c | 19 ++- fs/bcachefs/replicas.c | 17 ++- fs/bcachefs/util.h | 29 ----- 16 files changed, 172 insertions(+), 386 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 289d7ae4e98c..fa9593764f0c 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -732,7 +732,7 @@ struct bch_fs { ZSTD_parameters zstd_params; struct crypto_shash *sha256; - struct crypto_skcipher *chacha20; + struct crypto_sync_skcipher *chacha20; struct crypto_shash *poly1305; atomic64_t key_version; diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index c97e1e9002cb..55ef4032b37c 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -283,49 +283,64 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, const struct bkey_ops *ops; struct bkey uk; struct bkey_s u; - - if (big_endian != CPU_BIG_ENDIAN) - bch2_bkey_swab_key(f, k); - - if (version < bcachefs_metadata_version_bkey_renumber) - bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write); - - if (version < bcachefs_metadata_version_inode_btree_change && - btree_id == BTREE_ID_INODES) { + int i; + + /* + * Do these operations in reverse order in the write path: + */ + + for (i = 0; i < 4; i++) + switch (!write ? i : 3 - i) { + case 0: + if (big_endian != CPU_BIG_ENDIAN) + bch2_bkey_swab_key(f, k); + break; + case 1: + if (version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(__btree_node_type(level, btree_id), k, write); + break; + case 2: + if (version < bcachefs_metadata_version_inode_btree_change && + btree_id == BTREE_ID_INODES) { + if (!bkey_packed(k)) { + struct bkey_i *u = packed_to_bkey(k); + swap(u->k.p.inode, u->k.p.offset); + } else if (f->bits_per_field[BKEY_FIELD_INODE] && + f->bits_per_field[BKEY_FIELD_OFFSET]) { + struct bkey_format tmp = *f, *in = f, *out = &tmp; + + swap(tmp.bits_per_field[BKEY_FIELD_INODE], + tmp.bits_per_field[BKEY_FIELD_OFFSET]); + swap(tmp.field_offset[BKEY_FIELD_INODE], + tmp.field_offset[BKEY_FIELD_OFFSET]); + + if (!write) + swap(in, out); + + uk = __bch2_bkey_unpack_key(in, k); + swap(uk.p.inode, uk.p.offset); + BUG_ON(!bch2_bkey_pack_key(k, &uk, out)); + } + } + break; + case 3: if (!bkey_packed(k)) { - struct bkey_i *u = packed_to_bkey(k); - swap(u->k.p.inode, u->k.p.offset); - } else if (f->bits_per_field[BKEY_FIELD_INODE] && - f->bits_per_field[BKEY_FIELD_OFFSET]) { - struct bkey_format tmp = *f, *in = f, *out = &tmp; - - swap(tmp.bits_per_field[BKEY_FIELD_INODE], - tmp.bits_per_field[BKEY_FIELD_OFFSET]); - swap(tmp.field_offset[BKEY_FIELD_INODE], - tmp.field_offset[BKEY_FIELD_OFFSET]); - - if (!write) - swap(in, out); - - uk = __bch2_bkey_unpack_key(in, k); - swap(uk.p.inode, uk.p.offset); - BUG_ON(!bch2_bkey_pack_key(k, &uk, out)); + u = bkey_i_to_s(packed_to_bkey(k)); + } else { + uk = __bch2_bkey_unpack_key(f, k); + u.k = &uk; + u.v = bkeyp_val(f, k); } - } - if (!bkey_packed(k)) { - u = bkey_i_to_s(packed_to_bkey(k)); - } else { - uk = __bch2_bkey_unpack_key(f, k); - u.k = &uk; - u.v = bkeyp_val(f, k); - } + if (big_endian != CPU_BIG_ENDIAN) + bch2_bkey_swab_val(u); - if (big_endian != CPU_BIG_ENDIAN) - bch2_bkey_swab_val(u); + ops = &bch2_bkey_ops[k->type]; - ops = &bch2_bkey_ops[k->type]; - - if (ops->compat) - ops->compat(btree_id, version, big_endian, write, u); + if (ops->compat) + ops->compat(btree_id, version, big_endian, write, u); + break; + default: + BUG(); + } } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index cef8e148f784..146f2428fe04 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -699,8 +699,10 @@ static int bch2_gc_start(struct bch_fs *c, c->usage_gc = __alloc_percpu_gfp(fs_usage_u64s(c) * sizeof(u64), sizeof(u64), GFP_KERNEL); - if (!c->usage_gc) + if (!c->usage_gc) { + bch_err(c, "error allocating c->usage_gc"); return -ENOMEM; + } for_each_member_device(ca, c, i) { BUG_ON(ca->buckets[1]); @@ -711,19 +713,23 @@ static int bch2_gc_start(struct bch_fs *c, GFP_KERNEL|__GFP_ZERO); if (!ca->buckets[1]) { percpu_ref_put(&ca->ref); + bch_err(c, "error allocating ca->buckets[gc]"); return -ENOMEM; } ca->usage[1] = alloc_percpu(struct bch_dev_usage); if (!ca->usage[1]) { + bch_err(c, "error allocating ca->usage[gc]"); percpu_ref_put(&ca->ref); return -ENOMEM; } } ret = bch2_ec_mem_alloc(c, true); - if (ret) + if (ret) { + bch_err(c, "error allocating ec gc mem"); return ret; + } percpu_down_write(&c->mark_lock); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 61d4c98d3476..ac8b98861aae 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -620,7 +620,7 @@ static void bset_encrypt(struct bch_fs *c, struct bset *i, unsigned offset) bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, &bn->flags, bytes); - nonce = nonce_add(nonce, round_up(bytes, CHACHA20_BLOCK_SIZE)); + nonce = nonce_add(nonce, round_up(bytes, CHACHA_BLOCK_SIZE)); } bch2_encrypt(c, BSET_CSUM_TYPE(i), nonce, i->_data, diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index aaad2d289e79..9081d3fc238a 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -165,8 +165,7 @@ static inline bool btree_node_lock_increment(struct btree_iter *iter, struct btree_iter *linked; trans_for_each_iter(iter->trans, linked) - if (linked != iter && - linked->l[level].b == b && + if (linked->l[level].b == b && btree_node_locked_type(linked, level) >= want) { six_lock_increment(&b->lock, want); return true; diff --git a/fs/bcachefs/checksum.c b/fs/bcachefs/checksum.c index 9ef2cb9b9904..6f1afa4a3119 100644 --- a/fs/bcachefs/checksum.c +++ b/fs/bcachefs/checksum.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -67,21 +67,21 @@ static u64 bch2_checksum_update(unsigned type, u64 crc, const void *data, size_t } } -static inline void do_encrypt_sg(struct crypto_skcipher *tfm, +static inline void do_encrypt_sg(struct crypto_sync_skcipher *tfm, struct nonce nonce, struct scatterlist *sg, size_t len) { - SKCIPHER_REQUEST_ON_STACK(req, tfm); + SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm); int ret; - skcipher_request_set_tfm(req, tfm); + skcipher_request_set_sync_tfm(req, tfm); skcipher_request_set_crypt(req, sg, sg, len, nonce.d); ret = crypto_skcipher_encrypt(req); BUG_ON(ret); } -static inline void do_encrypt(struct crypto_skcipher *tfm, +static inline void do_encrypt(struct crypto_sync_skcipher *tfm, struct nonce nonce, void *buf, size_t len) { @@ -94,8 +94,8 @@ static inline void do_encrypt(struct crypto_skcipher *tfm, int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, void *buf, size_t len) { - struct crypto_skcipher *chacha20 = - crypto_alloc_skcipher("chacha20", 0, 0); + struct crypto_sync_skcipher *chacha20 = + crypto_alloc_sync_skcipher("chacha20", 0, 0); int ret; if (!chacha20) { @@ -103,7 +103,8 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, return PTR_ERR(chacha20); } - ret = crypto_skcipher_setkey(chacha20, (void *) key, sizeof(*key)); + ret = crypto_skcipher_setkey(&chacha20->base, + (void *) key, sizeof(*key)); if (ret) { pr_err("crypto_skcipher_setkey() error: %i", ret); goto err; @@ -111,7 +112,7 @@ int bch2_chacha_encrypt_key(struct bch_key *key, struct nonce nonce, do_encrypt(chacha20, nonce, buf, len); err: - crypto_free_skcipher(chacha20); + crypto_free_sync_skcipher(chacha20); return ret; } @@ -198,7 +199,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, kunmap_atomic(p); } #else - __bio_for_each_contig_segment(bv, bio, *iter, *iter) + __bio_for_each_bvec(bv, bio, *iter, *iter) crc = bch2_checksum_update(type, crc, page_address(bv.bv_page) + bv.bv_offset, bv.bv_len); @@ -223,7 +224,7 @@ static struct bch_csum __bch2_checksum_bio(struct bch_fs *c, unsigned type, kunmap_atomic(p); } #else - __bio_for_each_contig_segment(bv, bio, *iter, *iter) + __bio_for_each_bvec(bv, bio, *iter, *iter) crypto_shash_update(desc, page_address(bv.bv_page) + bv.bv_offset, bv.bv_len); @@ -462,7 +463,7 @@ err: static int bch2_alloc_ciphers(struct bch_fs *c) { if (!c->chacha20) - c->chacha20 = crypto_alloc_skcipher("chacha20", 0, 0); + c->chacha20 = crypto_alloc_sync_skcipher("chacha20", 0, 0); if (IS_ERR(c->chacha20)) { bch_err(c, "error requesting chacha20 module: %li", PTR_ERR(c->chacha20)); @@ -545,7 +546,7 @@ int bch2_enable_encryption(struct bch_fs *c, bool keyed) goto err; } - ret = crypto_skcipher_setkey(c->chacha20, + ret = crypto_skcipher_setkey(&c->chacha20->base, (void *) &key.key, sizeof(key.key)); if (ret) goto err; @@ -573,7 +574,7 @@ void bch2_fs_encryption_exit(struct bch_fs *c) if (!IS_ERR_OR_NULL(c->poly1305)) crypto_free_shash(c->poly1305); if (!IS_ERR_OR_NULL(c->chacha20)) - crypto_free_skcipher(c->chacha20); + crypto_free_sync_skcipher(c->chacha20); if (!IS_ERR_OR_NULL(c->sha256)) crypto_free_shash(c->sha256); } @@ -605,7 +606,7 @@ int bch2_fs_encryption_init(struct bch_fs *c) if (ret) goto out; - ret = crypto_skcipher_setkey(c->chacha20, + ret = crypto_skcipher_setkey(&c->chacha20->base, (void *) &key.key, sizeof(key.key)); if (ret) goto out; diff --git a/fs/bcachefs/checksum.h b/fs/bcachefs/checksum.h index 833537cc8fd0..24dee8039d57 100644 --- a/fs/bcachefs/checksum.h +++ b/fs/bcachefs/checksum.h @@ -7,7 +7,7 @@ #include "super-io.h" #include -#include +#include static inline bool bch2_checksum_mergeable(unsigned type) { @@ -138,9 +138,9 @@ static inline bool bch2_crc_cmp(struct bch_csum l, struct bch_csum r) /* for skipping ahead and encrypting/decrypting at an offset: */ static inline struct nonce nonce_add(struct nonce nonce, unsigned offset) { - EBUG_ON(offset & (CHACHA20_BLOCK_SIZE - 1)); + EBUG_ON(offset & (CHACHA_BLOCK_SIZE - 1)); - le32_add_cpu(&nonce.d[0], offset / CHACHA20_BLOCK_SIZE); + le32_add_cpu(&nonce.d[0], offset / CHACHA_BLOCK_SIZE); return nonce; } diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c index 4a163b6d59da..0713286d7999 100644 --- a/fs/bcachefs/compress.c +++ b/fs/bcachefs/compress.c @@ -55,7 +55,7 @@ static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio, BUG_ON(bvec_iter_sectors(start) > c->sb.encoded_extent_max); #ifndef CONFIG_HIGHMEM - __bio_for_each_contig_segment(bv, bio, iter, start) { + __bio_for_each_bvec(bv, bio, iter, start) { if (bv.bv_len == start.bi_size) return (struct bbuf) { .b = page_address(bv.bv_page) + bv.bv_offset, diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c index 2a7d913bdda3..d0af1bc17018 100644 --- a/fs/bcachefs/extent_update.c +++ b/fs/bcachefs/extent_update.c @@ -34,16 +34,10 @@ static int count_iters_for_insert(struct btree_trans *trans, unsigned offset, struct bpos *end, unsigned *nr_iters, - unsigned max_iters, - bool overwrite) + unsigned max_iters) { - int ret = 0; + int ret = 0, ret2 = 0; - /* - * The extent update path requires an _additional_ iterator for each - * extent we're inserting and overwriting: - */ - *nr_iters += 1; if (*nr_iters >= max_iters) { *end = bpos_min(*end, k.k->p); ret = 1; @@ -70,11 +64,14 @@ static int count_iters_for_insert(struct btree_trans *trans, for_each_btree_key(trans, iter, BTREE_ID_REFLINK, POS(0, idx + offset), - BTREE_ITER_SLOTS, r_k, ret) { + BTREE_ITER_SLOTS, r_k, ret2) { if (bkey_cmp(bkey_start_pos(r_k.k), POS(0, idx + sectors)) >= 0) break; + /* extent_update_to_keys(), for the reflink_v update */ + *nr_iters += 1; + *nr_iters += 1 + bch2_bkey_nr_alloc_ptrs(r_k); if (*nr_iters >= max_iters) { @@ -92,7 +89,7 @@ static int count_iters_for_insert(struct btree_trans *trans, } } - return ret; + return ret2 ?: ret; } #define EXTENT_ITERS_MAX (BTREE_ITER_MAX / 3) @@ -121,8 +118,11 @@ int bch2_extent_atomic_end(struct btree_iter *iter, *end = bpos_min(insert->k.p, b->key.k.p); + /* extent_update_to_keys(): */ + nr_iters += 1; + ret = count_iters_for_insert(trans, bkey_i_to_s_c(insert), 0, end, - &nr_iters, EXTENT_ITERS_MAX / 2, false); + &nr_iters, EXTENT_ITERS_MAX / 2); if (ret < 0) return ret; @@ -139,8 +139,20 @@ int bch2_extent_atomic_end(struct btree_iter *iter, offset = bkey_start_offset(&insert->k) - bkey_start_offset(k.k); + /* extent_handle_overwrites(): */ + switch (bch2_extent_overlap(&insert->k, k.k)) { + case BCH_EXTENT_OVERLAP_ALL: + case BCH_EXTENT_OVERLAP_FRONT: + nr_iters += 1; + break; + case BCH_EXTENT_OVERLAP_BACK: + case BCH_EXTENT_OVERLAP_MIDDLE: + nr_iters += 2; + break; + } + ret = count_iters_for_insert(trans, k, offset, end, - &nr_iters, EXTENT_ITERS_MAX, true); + &nr_iters, EXTENT_ITERS_MAX); if (ret) break; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 707ce27eace6..7de61f7f4314 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -602,7 +602,7 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage, EBUG_ON(!PageLocked(page)); EBUG_ON(!PageLocked(newpage)); - ret = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + ret = migrate_page_move_mapping(mapping, newpage, page, 0); if (ret != MIGRATEPAGE_SUCCESS) return ret; @@ -627,10 +627,10 @@ int bch2_migrate_page(struct address_space *mapping, struct page *newpage, static void bch2_readpages_end_io(struct bio *bio) { + struct bvec_iter_all iter; struct bio_vec *bv; - unsigned i; - bio_for_each_segment_all(bv, bio, i) { + bio_for_each_segment_all(bv, bio, iter) { struct page *page = bv->bv_page; if (!bio->bi_status) { @@ -782,11 +782,8 @@ static void readpage_bio_extend(struct readpages_iter *iter, if (!get_more) break; - rcu_read_lock(); - page = radix_tree_lookup(&iter->mapping->i_pages, page_offset); - rcu_read_unlock(); - - if (page && !radix_tree_exceptional_entry(page)) + page = xa_load(&iter->mapping->i_pages, page_offset); + if (page && !xa_is_value(page)) break; page = __page_cache_alloc(readahead_gfp_mask(iter->mapping)); @@ -1037,32 +1034,33 @@ static void bch2_writepage_io_done(struct closure *cl) struct bch_writepage_io, cl); struct bch_fs *c = io->op.c; struct bio *bio = &io->op.wbio.bio; + struct bvec_iter_all iter; struct bio_vec *bvec; - unsigned i, j; + unsigned i; if (io->op.error) { - bio_for_each_segment_all(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, iter) { struct bch_page_state *s; SetPageError(bvec->bv_page); - mapping_set_error(io->inode->v.i_mapping, -EIO); + mapping_set_error(bvec->bv_page->mapping, -EIO); s = __bch2_page_state(bvec->bv_page); spin_lock(&s->lock); - for (j = 0; j < PAGE_SECTORS; j++) - s->s[j].nr_replicas = 0; + for (i = 0; i < PAGE_SECTORS; i++) + s->s[i].nr_replicas = 0; spin_unlock(&s->lock); } } if (io->op.flags & BCH_WRITE_WROTE_DATA_INLINE) { - bio_for_each_segment_all(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, iter) { struct bch_page_state *s; s = __bch2_page_state(bvec->bv_page); spin_lock(&s->lock); - for (j = 0; j < PAGE_SECTORS; j++) - s->s[j].nr_replicas = 0; + for (i = 0; i < PAGE_SECTORS; i++) + s->s[i].nr_replicas = 0; spin_unlock(&s->lock); } } @@ -1086,7 +1084,7 @@ static void bch2_writepage_io_done(struct closure *cl) */ i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); - bio_for_each_segment_all(bvec, bio, i) { + bio_for_each_segment_all(bvec, bio, iter) { struct bch_page_state *s = __bch2_page_state(bvec->bv_page); if (atomic_dec_and_test(&s->write_count)) @@ -1240,8 +1238,9 @@ do_io: if (w->io && (w->io->op.res.nr_replicas != nr_replicas_this_write || - bio_full(&w->io->op.wbio.bio) || - w->io->op.wbio.bio.bi_iter.bi_size >= (256U << 20) || + bio_full(&w->io->op.wbio.bio, PAGE_SIZE) || + w->io->op.wbio.bio.bi_iter.bi_size + (sectors << 9) >= + (BIO_MAX_PAGES * PAGE_SIZE) || bio_end_sector(&w->io->op.wbio.bio) != sector)) bch2_writepage_do_io(w); @@ -1805,8 +1804,9 @@ static long bch2_dio_write_loop(struct dio_write *dio) struct address_space *mapping = req->ki_filp->f_mapping; struct bch_inode_info *inode = file_bch_inode(req->ki_filp); struct bio *bio = &dio->op.wbio.bio; + struct bvec_iter_all iter; struct bio_vec *bv; - unsigned i, unaligned; + unsigned unaligned; u64 new_i_size; bool sync = dio->sync; long ret; @@ -1815,12 +1815,22 @@ static long bch2_dio_write_loop(struct dio_write *dio) goto loop; while (1) { + size_t extra = dio->iter.count - + min(BIO_MAX_PAGES * PAGE_SIZE, dio->iter.count); + if (kthread) use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); current->faults_disabled_mapping = mapping; + /* + * Don't issue more than 2MB at once, the bcachefs io path in + * io.c can't bounce more than that: + */ + + dio->iter.count -= extra; ret = bio_iov_iter_get_pages(bio, &dio->iter); + dio->iter.count += extra; current->faults_disabled_mapping = NULL; if (kthread) @@ -1838,7 +1848,7 @@ static long bch2_dio_write_loop(struct dio_write *dio) * bio_iov_iter_get_pages was only able to get < * blocksize worth of pages: */ - bio_for_each_segment_all(bv, bio, i) + bio_for_each_segment_all(bv, bio, iter) put_page(bv->bv_page); ret = -EFAULT; goto err; @@ -1886,7 +1896,7 @@ loop: i_size_write(&inode->v, new_i_size); spin_unlock(&inode->v.i_lock); - bio_for_each_segment_all(bv, bio, i) + bio_for_each_segment_all(bv, bio, iter) put_page(bv->bv_page); if (!dio->iter.count || dio->op.error) break; @@ -2823,235 +2833,6 @@ static void mark_range_unallocated(struct bch_inode_info *inode, } while (index <= end_index); } -static int generic_access_check_limits(struct file *file, loff_t pos, - loff_t *count) -{ - struct inode *inode = file->f_mapping->host; - loff_t max_size = inode->i_sb->s_maxbytes; - - if (!(file->f_flags & O_LARGEFILE)) - max_size = MAX_NON_LFS; - - if (unlikely(pos >= max_size)) - return -EFBIG; - *count = min(*count, max_size - pos); - return 0; -} - -static int generic_write_check_limits(struct file *file, loff_t pos, - loff_t *count) -{ - loff_t limit = rlimit(RLIMIT_FSIZE); - - if (limit != RLIM_INFINITY) { - if (pos >= limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - *count = min(*count, limit - pos); - } - - return generic_access_check_limits(file, pos, count); -} - -static int generic_remap_checks(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *req_count, unsigned int remap_flags) -{ - struct inode *inode_in = file_in->f_mapping->host; - struct inode *inode_out = file_out->f_mapping->host; - uint64_t count = *req_count; - uint64_t bcount; - loff_t size_in, size_out; - loff_t bs = inode_out->i_sb->s_blocksize; - int ret; - - /* The start of both ranges must be aligned to an fs block. */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs)) - return -EINVAL; - - /* Ensure offsets don't wrap. */ - if (pos_in + count < pos_in || pos_out + count < pos_out) - return -EINVAL; - - size_in = i_size_read(inode_in); - size_out = i_size_read(inode_out); - - /* Dedupe requires both ranges to be within EOF. */ - if ((remap_flags & REMAP_FILE_DEDUP) && - (pos_in >= size_in || pos_in + count > size_in || - pos_out >= size_out || pos_out + count > size_out)) - return -EINVAL; - - /* Ensure the infile range is within the infile. */ - if (pos_in >= size_in) - return -EINVAL; - count = min(count, size_in - (uint64_t)pos_in); - - ret = generic_access_check_limits(file_in, pos_in, &count); - if (ret) - return ret; - - ret = generic_write_check_limits(file_out, pos_out, &count); - if (ret) - return ret; - - /* - * If the user wanted us to link to the infile's EOF, round up to the - * next block boundary for this check. - * - * Otherwise, make sure the count is also block-aligned, having - * already confirmed the starting offsets' block alignment. - */ - if (pos_in + count == size_in) { - bcount = ALIGN(size_in, bs) - pos_in; - } else { - if (!IS_ALIGNED(count, bs)) - count = ALIGN_DOWN(count, bs); - bcount = count; - } - - /* Don't allow overlapped cloning within the same file. */ - if (inode_in == inode_out && - pos_out + bcount > pos_in && - pos_out < pos_in + bcount) - return -EINVAL; - - /* - * We shortened the request but the caller can't deal with that, so - * bounce the request back to userspace. - */ - if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN)) - return -EINVAL; - - *req_count = count; - return 0; -} - -static int generic_remap_check_len(struct inode *inode_in, - struct inode *inode_out, - loff_t pos_out, - loff_t *len, - unsigned int remap_flags) -{ - u64 blkmask = i_blocksize(inode_in) - 1; - loff_t new_len = *len; - - if ((*len & blkmask) == 0) - return 0; - - if ((remap_flags & REMAP_FILE_DEDUP) || - pos_out + *len < i_size_read(inode_out)) - new_len &= ~blkmask; - - if (new_len == *len) - return 0; - - if (remap_flags & REMAP_FILE_CAN_SHORTEN) { - *len = new_len; - return 0; - } - - return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL; -} - -static int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - loff_t *len, unsigned int remap_flags) -{ - struct inode *inode_in = file_inode(file_in); - struct inode *inode_out = file_inode(file_out); - bool same_inode = (inode_in == inode_out); - int ret; - - /* Don't touch certain kinds of inodes */ - if (IS_IMMUTABLE(inode_out)) - return -EPERM; - - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) - return -ETXTBSY; - - /* Don't reflink dirs, pipes, sockets... */ - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; - - /* Zero length dedupe exits immediately; reflink goes to EOF. */ - if (*len == 0) { - loff_t isize = i_size_read(inode_in); - - if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize) - return 0; - if (pos_in > isize) - return -EINVAL; - *len = isize - pos_in; - if (*len == 0) - return 0; - } - - /* Check that we don't violate system file offset limits. */ - ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len, - remap_flags); - if (ret) - return ret; - - /* Wait for the completion of any pending IOs on both files */ - inode_dio_wait(inode_in); - if (!same_inode) - inode_dio_wait(inode_out); - - ret = filemap_write_and_wait_range(inode_in->i_mapping, - pos_in, pos_in + *len - 1); - if (ret) - return ret; - - ret = filemap_write_and_wait_range(inode_out->i_mapping, - pos_out, pos_out + *len - 1); - if (ret) - return ret; - - /* - * Check that the extents are the same. - */ - if (remap_flags & REMAP_FILE_DEDUP) { - bool is_same = false; - - ret = vfs_dedupe_file_range_compare(inode_in, pos_in, - inode_out, pos_out, *len, &is_same); - if (ret) - return ret; - if (!is_same) - return -EBADE; - } - - ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, - remap_flags); - if (ret) - return ret; - - /* If can't alter the file contents, we're done. */ - if (!(remap_flags & REMAP_FILE_DEDUP)) { - /* Update the timestamps, since we can alter file contents. */ - if (!(file_out->f_mode & FMODE_NOCMTIME)) { - ret = file_update_time(file_out); - if (ret) - return ret; - } - - /* - * Clear the security bits if the process is not being run by - * root. This keeps people from modifying setuid and setgid - * binaries. - */ - ret = file_remove_privs(file_out); - if (ret) - return ret; - } - - return 0; -} - loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src, struct file *file_dst, loff_t pos_dst, loff_t len, unsigned remap_flags) @@ -3244,7 +3025,7 @@ static loff_t page_hole_offset(struct address_space *mapping, loff_t offset) loff_t ret = -1; page = find_lock_entry(mapping, index); - if (!page || radix_tree_exception(page)) + if (!page || xa_is_value(page)) return offset; pg_offset = __page_hole_offset(page, offset & (PAGE_SIZE - 1)); diff --git a/fs/bcachefs/fs-io.h b/fs/bcachefs/fs-io.h index 1b593ea707d5..7063556d289b 100644 --- a/fs/bcachefs/fs-io.h +++ b/fs/bcachefs/fs-io.h @@ -35,10 +35,6 @@ int bch2_fsync(struct file *, loff_t, loff_t, int); int bch2_truncate(struct bch_inode_info *, struct iattr *); long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t); -#define REMAP_FILE_ADVISORY (0) -#define REMAP_FILE_DEDUP (1 << 0) -#define REMAP_FILE_CAN_SHORTEN (1 << 1) - loff_t bch2_remap_file_range(struct file *, loff_t, struct file *, loff_t, loff_t, unsigned); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b1445bdc3e9d..1c89a1b2c2d0 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -966,15 +966,6 @@ static int bch2_vfs_readdir(struct file *file, struct dir_context *ctx) return bch2_readdir(c, inode->v.i_ino, ctx); } -static int bch2_clone_file_range(struct file *file_src, loff_t pos_src, - struct file *file_dst, loff_t pos_dst, - u64 len) -{ - return bch2_remap_file_range(file_src, pos_src, - file_dst, pos_dst, - len, 0); -} - static const struct file_operations bch_file_operations = { .llseek = bch2_llseek, .read_iter = bch2_read_iter, @@ -992,7 +983,7 @@ static const struct file_operations bch_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = bch2_compat_fs_ioctl, #endif - .clone_file_range = bch2_clone_file_range, + .remap_file_range = bch2_remap_file_range, }; static const struct inode_operations bch_file_inode_operations = { @@ -1523,7 +1514,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, sb->s_bdi->congested_fn = bch2_congested; sb->s_bdi->congested_data = c; - sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_READAHEAD_PAGES; for_each_online_member(ca, c, i) { struct block_device *bdev = ca->disk_sb.bdev; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index ff3dd31c134c..19059702428a 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -124,10 +124,10 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) void bch2_bio_free_pages_pool(struct bch_fs *c, struct bio *bio) { + struct bvec_iter_all iter; struct bio_vec *bv; - unsigned i; - bio_for_each_segment_all(bv, bio, i) + bio_for_each_segment_all(bv, bio, iter) if (bv->bv_page != ZERO_PAGE(0)) mempool_free(bv->bv_page, &c->bio_bounce_pages); bio->bi_vcnt = 0; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 728c7554779f..67e495bc8aba 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -70,19 +70,26 @@ static int bch2_migrate_index_update(struct bch_write_op *op) BTREE_ITER_SLOTS|BTREE_ITER_INTENT); while (1) { - struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); + struct bkey_s_c k; struct bkey_i *insert; - struct bkey_i_extent *new = - bkey_i_to_extent(bch2_keylist_front(keys)); + struct bkey_i_extent *new; BKEY_PADDED(k) _new, _insert; const union bch_extent_entry *entry; struct extent_ptr_decoded p; bool did_work = false; int nr; + bch2_trans_reset(&trans, 0); + + k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); - if (ret) + if (ret) { + if (ret == -EINTR) + continue; break; + } + + new = bkey_i_to_extent(bch2_keylist_front(keys)); if (bversion_cmp(k.k->version, new->k.version) || !bch2_bkey_matches_ptr(c, k, m->ptr, m->offset)) @@ -306,12 +313,12 @@ static void move_free(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); struct moving_context *ctxt = io->write.ctxt; + struct bvec_iter_all iter; struct bio_vec *bv; - unsigned i; bch2_disk_reservation_put(io->write.op.c, &io->write.op.res); - bio_for_each_segment_all(bv, &io->write.op.wbio.bio, i) + bio_for_each_segment_all(bv, &io->write.op.wbio.bio, iter) if (bv->bv_page) __free_page(bv->bv_page); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index be4908575f72..67a7128fd9af 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -299,8 +299,10 @@ static int replicas_table_update(struct bch_fs *c, GFP_NOIO)) || !(new_scratch = kmalloc(bytes, GFP_NOIO)) || (c->usage_gc && - !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) + !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_NOIO)))) { + bch_err(c, "error updating replicas table: memory allocation failure"); goto err; + } if (c->usage_base) __replicas_table_update(new_base, new_r, @@ -362,7 +364,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, struct bch_replicas_entry *new_entry) { struct bch_replicas_cpu new_r, new_gc; - int ret = -ENOMEM; + int ret = 0; verify_replicas_entry(new_entry); @@ -409,14 +411,16 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, swap(new_gc, c->replicas_gc); percpu_up_write(&c->mark_lock); out: - ret = 0; -err: mutex_unlock(&c->sb_lock); kfree(new_r.entries); kfree(new_gc.entries); return ret; +err: + bch_err(c, "error adding replicas entry: memory allocation failure"); + ret = -ENOMEM; + goto out; } int bch2_mark_replicas(struct bch_fs *c, @@ -561,6 +565,7 @@ int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) GFP_NOIO); if (!c->replicas_gc.entries) { mutex_unlock(&c->sb_lock); + bch_err(c, "error allocating c->replicas_gc"); return -ENOMEM; } @@ -586,8 +591,10 @@ retry: nr = READ_ONCE(c->replicas.nr); new.entry_size = READ_ONCE(c->replicas.entry_size); new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); - if (!new.entries) + if (!new.entries) { + bch_err(c, "error allocating c->replicas_gc"); return -ENOMEM; + } mutex_lock(&c->sb_lock); percpu_down_write(&c->mark_lock); diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h index 2b19a0038045..0128daba5970 100644 --- a/fs/bcachefs/util.h +++ b/fs/bcachefs/util.h @@ -664,35 +664,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) memset(s + bytes, c, rem); } -static inline struct bio_vec next_contig_bvec(struct bio *bio, - struct bvec_iter *iter) -{ - struct bio_vec bv = bio_iter_iovec(bio, *iter); - - bio_advance_iter(bio, iter, bv.bv_len); -#ifndef CONFIG_HIGHMEM - while (iter->bi_size) { - struct bio_vec next = bio_iter_iovec(bio, *iter); - - if (page_address(bv.bv_page) + bv.bv_offset + bv.bv_len != - page_address(next.bv_page) + next.bv_offset) - break; - - bv.bv_len += next.bv_len; - bio_advance_iter(bio, iter, next.bv_len); - } -#endif - return bv; -} - -#define __bio_for_each_contig_segment(bv, bio, iter, start) \ - for (iter = (start); \ - (iter).bi_size && \ - ((bv = next_contig_bvec((bio), &(iter))), 1);) - -#define bio_for_each_contig_segment(bv, bio, iter) \ - __bio_for_each_contig_segment(bv, bio, iter, (bio)->bi_iter) - void sort_cmp_size(void *base, size_t num, size_t size, int (*cmp_func)(const void *, const void *, size_t), void (*swap_func)(void *, void *, size_t)); -- cgit v1.2.3