diff options
author | James Morris <james.l.morris@oracle.com> | 2017-11-29 12:47:41 +1100 |
---|---|---|
committer | James Morris <james.l.morris@oracle.com> | 2017-11-29 12:47:41 +1100 |
commit | cf40a76e7d5874bb25f4404eecc58a2e033af885 (patch) | |
tree | 8fd81cbea03c87b3d41d7ae5b1d11eadd35d6ef5 /fs/btrfs/disk-io.c | |
parent | ab5348c9c23cd253f5902980d2d8fe067dc24c82 (diff) | |
parent | 4fbd8d194f06c8a3fd2af1ce560ddb31f7ec8323 (diff) |
Merge tag 'v4.15-rc1' into next-seccomp
Linux 4.15-rc1
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 496 |
1 files changed, 147 insertions, 349 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 080e2ebb8aa0..efce9a2fa9be 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -50,6 +50,8 @@ #include "sysfs.h" #include "qgroup.h" #include "compression.h" +#include "tree-checker.h" +#include "ref-verify.h" #ifdef CONFIG_X86 #include <asm/cpufeature.h> @@ -529,7 +531,7 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - u8 fsid[BTRFS_UUID_SIZE]; + u8 fsid[BTRFS_FSID_SIZE]; int ret = 1; read_extent_buffer(eb, fsid, btrfs_header_fsid(), BTRFS_FSID_SIZE); @@ -543,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, return ret; } -#define CORRUPT(reason, eb, root, slot) \ - btrfs_crit(root->fs_info, \ - "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ - btrfs_header_level(eb) == 0 ? "leaf" : "node", \ - reason, btrfs_header_bytenr(eb), root->objectid, slot) - -static noinline int check_leaf(struct btrfs_root *root, - struct extent_buffer *leaf) -{ - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_key key; - struct btrfs_key leaf_key; - u32 nritems = btrfs_header_nritems(leaf); - int slot; - - /* - * Extent buffers from a relocation tree have a owner field that - * corresponds to the subvolume tree they are based on. So just from an - * extent buffer alone we can not find out what is the id of the - * corresponding subvolume tree, so we can not figure out if the extent - * buffer corresponds to the root of the relocation tree or not. So skip - * this check for relocation trees. - */ - if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { - struct btrfs_root *check_root; - - key.objectid = btrfs_header_owner(leaf); - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = (u64)-1; - - check_root = btrfs_get_fs_root(fs_info, &key, false); - /* - * The only reason we also check NULL here is that during - * open_ctree() some roots has not yet been set up. - */ - if (!IS_ERR_OR_NULL(check_root)) { - struct extent_buffer *eb; - - eb = btrfs_root_node(check_root); - /* if leaf is the root, then it's fine */ - if (leaf != eb) { - CORRUPT("non-root leaf's nritems is 0", - leaf, check_root, 0); - free_extent_buffer(eb); - return -EIO; - } - free_extent_buffer(eb); - } - return 0; - } - - if (nritems == 0) - return 0; - - /* Check the 0 item */ - if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != - BTRFS_LEAF_DATA_SIZE(fs_info)) { - CORRUPT("invalid item offset size pair", leaf, root, 0); - return -EIO; - } - - /* - * Check to make sure each items keys are in the correct order and their - * offsets make sense. We only have to loop through nritems-1 because - * we check the current slot against the next slot, which verifies the - * next slot's offset+size makes sense and that the current's slot - * offset is correct. - */ - for (slot = 0; slot < nritems - 1; slot++) { - btrfs_item_key_to_cpu(leaf, &leaf_key, slot); - btrfs_item_key_to_cpu(leaf, &key, slot + 1); - - /* Make sure the keys are in the right order */ - if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { - CORRUPT("bad key order", leaf, root, slot); - return -EIO; - } - - /* - * Make sure the offset and ends are right, remember that the - * item data starts at the end of the leaf and grows towards the - * front. - */ - if (btrfs_item_offset_nr(leaf, slot) != - btrfs_item_end_nr(leaf, slot + 1)) { - CORRUPT("slot offset bad", leaf, root, slot); - return -EIO; - } - - /* - * Check to make sure that we don't point outside of the leaf, - * just in case all the items are consistent to each other, but - * all point outside of the leaf. - */ - if (btrfs_item_end_nr(leaf, slot) > - BTRFS_LEAF_DATA_SIZE(fs_info)) { - CORRUPT("slot end outside of leaf", leaf, root, slot); - return -EIO; - } - } - - return 0; -} - -static int check_node(struct btrfs_root *root, struct extent_buffer *node) -{ - unsigned long nr = btrfs_header_nritems(node); - struct btrfs_key key, next_key; - int slot; - u64 bytenr; - int ret = 0; - - if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { - btrfs_crit(root->fs_info, - "corrupt node: block %llu root %llu nritems %lu", - node->start, root->objectid, nr); - return -EIO; - } - - for (slot = 0; slot < nr - 1; slot++) { - bytenr = btrfs_node_blockptr(node, slot); - btrfs_node_key_to_cpu(node, &key, slot); - btrfs_node_key_to_cpu(node, &next_key, slot + 1); - - if (!bytenr) { - CORRUPT("invalid item slot", node, root, slot); - ret = -EIO; - goto out; - } - - if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { - CORRUPT("bad key order", node, root, slot); - ret = -EIO; - goto out; - } - } -out: - return ret; -} - static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -748,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, * that we don't try and read the other copies of this block, just * return -EIO. */ - if (found_level == 0 && check_leaf(root, eb)) { + if (found_level == 0 && btrfs_check_leaf(root, eb)) { set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = -EIO; } - if (found_level > 0 && check_node(root, eb)) + if (found_level > 0 && btrfs_check_node(root, eb)) ret = -EIO; if (!ret) @@ -879,22 +741,9 @@ static void run_one_async_start(struct btrfs_work *work) static void run_one_async_done(struct btrfs_work *work) { - struct btrfs_fs_info *fs_info; struct async_submit_bio *async; - int limit; async = container_of(work, struct async_submit_bio, work); - fs_info = async->fs_info; - - limit = btrfs_async_submit_limit(fs_info); - limit = limit * 2 / 3; - - /* - * atomic_dec_return implies a barrier for waitqueue_active - */ - if (atomic_dec_return(&fs_info->nr_async_submits) < limit && - waitqueue_active(&fs_info->async_submit_wait)) - wake_up(&fs_info->async_submit_wait); /* If an error occurred we just want to clean up the bio and move on */ if (async->status) { @@ -942,19 +791,10 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, async->status = 0; - atomic_inc(&fs_info->nr_async_submits); - if (op_is_sync(bio->bi_opf)) btrfs_set_work_high_priority(&async->work); btrfs_queue_work(fs_info->workers, &async->work); - - while (atomic_read(&fs_info->async_submit_draining) && - atomic_read(&fs_info->nr_async_submits)) { - wait_event(fs_info->async_submit_wait, - (atomic_read(&fs_info->nr_async_submits) == 0)); - } - return 0; } @@ -1005,9 +845,9 @@ static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio, return ret; } -static int check_async_write(unsigned long bio_flags) +static int check_async_write(struct btrfs_inode *bi) { - if (bio_flags & EXTENT_BIO_TREE_LOG) + if (atomic_read(&bi->sync_writers)) return 0; #ifdef CONFIG_X86 if (static_cpu_has(X86_FEATURE_XMM4_2)) @@ -1022,7 +862,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, { struct inode *inode = private_data; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int async = check_async_write(bio_flags); + int async = check_async_write(BTRFS_I(inode)); blk_status_t ret; if (bio_op(bio) != REQ_OP_WRITE) { @@ -1343,7 +1183,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->log_batch, 0); atomic_set(&root->orphan_inodes, 0); refcount_set(&root->refs, 1); - atomic_set(&root->will_be_snapshoted, 0); + atomic_set(&root->will_be_snapshotted, 0); atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; @@ -2478,7 +2318,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, return ret; } - if (fs_info->sb->s_flags & MS_RDONLY) { + if (sb_rdonly(fs_info->sb)) { ret = btrfs_commit_super(fs_info); if (ret) return ret; @@ -2607,14 +2447,6 @@ int open_ctree(struct super_block *sb, goto fail_delalloc_bytes; } - fs_info->btree_inode = new_inode(sb); - if (!fs_info->btree_inode) { - err = -ENOMEM; - goto fail_bio_counter; - } - - mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); - INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); @@ -2647,17 +2479,12 @@ int open_ctree(struct super_block *sb, btrfs_mapping_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); - btrfs_init_block_rsv(&fs_info->delalloc_block_rsv, - BTRFS_BLOCK_RSV_DELALLOC); btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); btrfs_init_block_rsv(&fs_info->delayed_block_rsv, BTRFS_BLOCK_RSV_DELOPS); - atomic_set(&fs_info->nr_async_submits, 0); atomic_set(&fs_info->async_delalloc_pages, 0); - atomic_set(&fs_info->async_submit_draining, 0); - atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); @@ -2673,12 +2500,21 @@ int open_ctree(struct super_block *sb, /* readahead state */ INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); spin_lock_init(&fs_info->reada_lock); + btrfs_init_ref_verify(fs_info); fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); INIT_LIST_HEAD(&fs_info->ordered_roots); spin_lock_init(&fs_info->ordered_root_lock); + + fs_info->btree_inode = new_inode(sb); + if (!fs_info->btree_inode) { + err = -ENOMEM; + goto fail_bio_counter; + } + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); + fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), GFP_KERNEL); if (!fs_info->delayed_root) { @@ -2694,8 +2530,8 @@ int open_ctree(struct super_block *sb, btrfs_init_balance(fs_info); btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work); - sb->s_blocksize = 4096; - sb->s_blocksize_bits = blksize_bits(4096); + sb->s_blocksize = BTRFS_BDEV_BLOCKSIZE; + sb->s_blocksize_bits = blksize_bits(BTRFS_BDEV_BLOCKSIZE); btrfs_init_btree_inode(fs_info); @@ -2828,6 +2664,8 @@ int open_ctree(struct super_block *sb, features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; if (fs_info->compress_type == BTRFS_COMPRESS_LZO) features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; + else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) btrfs_info(fs_info, "has skinny extents"); @@ -2874,7 +2712,7 @@ int open_ctree(struct super_block *sb, features = btrfs_super_compat_ro_flags(disk_super) & ~BTRFS_FEATURE_COMPAT_RO_SUPP; - if (!(sb->s_flags & MS_RDONLY) && features) { + if (!sb_rdonly(sb) && features) { btrfs_err(fs_info, "cannot mount read-write because of unsupported optional features (%llx)", features); @@ -2893,12 +2731,13 @@ int open_ctree(struct super_block *sb, sb->s_bdi->congested_fn = btrfs_congested_fn; sb->s_bdi->congested_data = fs_info; sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; - sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; + sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE; sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); sb->s_blocksize = sectorsize; sb->s_blocksize_bits = blksize_bits(sectorsize); + memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE); mutex_lock(&fs_info->chunk_mutex); ret = btrfs_read_sys_array(fs_info); @@ -3035,15 +2874,10 @@ retry_root_backup: btrfs_err(fs_info, "failed to read block groups: %d", ret); goto fail_sysfs; } - fs_info->num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); - if (fs_info->fs_devices->missing_devices > - fs_info->num_tolerated_disk_barrier_failures && - !(sb->s_flags & MS_RDONLY)) { + + if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) { btrfs_warn(fs_info, -"missing devices (%llu) exceeds the limit (%d), writeable mount is not allowed", - fs_info->fs_devices->missing_devices, - fs_info->num_tolerated_disk_barrier_failures); + "writeable mount is not allowed due to too many missing devices"); goto fail_sysfs; } @@ -3058,11 +2892,9 @@ retry_root_backup: if (IS_ERR(fs_info->transaction_kthread)) goto fail_cleaner; - if (!btrfs_test_opt(fs_info, SSD) && - !btrfs_test_opt(fs_info, NOSSD) && + if (!btrfs_test_opt(fs_info, NOSSD) && !fs_info->fs_devices->rotating) { - btrfs_info(fs_info, "detected SSD devices, enabling SSD mode"); - btrfs_set_opt(fs_info->mount_opt, SSD); + btrfs_set_and_info(fs_info, SSD, "enabling ssd optimizations"); } /* @@ -3088,6 +2920,9 @@ retry_root_backup: if (ret) goto fail_trans_kthread; + if (btrfs_build_ref_tree(fs_info)) + btrfs_err(fs_info, "couldn't build ref tree"); + /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { @@ -3102,7 +2937,7 @@ retry_root_backup: if (ret) goto fail_qgroup; - if (!(sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(sb)) { ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto fail_qgroup; @@ -3128,7 +2963,7 @@ retry_root_backup: goto fail_qgroup; } - if (sb->s_flags & MS_RDONLY) + if (sb_rdonly(sb)) return 0; if (btrfs_test_opt(fs_info, CLEAR_CACHE) && @@ -3321,7 +3156,7 @@ int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num, if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode)) return -EINVAL; - bh = __bread(bdev, bytenr / 4096, BTRFS_SUPER_INFO_SIZE); + bh = __bread(bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, BTRFS_SUPER_INFO_SIZE); /* * If we fail to read from the underlying devices, as of now * the best option we have is to mark it EIO. @@ -3378,19 +3213,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) } /* - * this should be called twice, once with wait == 0 and - * once with wait == 1. When wait == 0 is done, all the buffer heads - * we write are pinned. + * Write superblock @sb to the @device. Do not wait for completion, all the + * buffer heads we write are pinned. * - * They are released when wait == 1 is done. - * max_mirrors must be the same for both runs, and it indicates how - * many supers on this one device should be written. + * Write @max_mirrors copies of the superblock, where 0 means default that fit + * the expected device size at commit time. Note that max_mirrors must be + * same for write and wait phases. * - * max_mirrors == 0 means to write them all. + * Return number of errors when buffer head is not found or submission fails. */ static int write_dev_supers(struct btrfs_device *device, - struct btrfs_super_block *sb, - int wait, int max_mirrors) + struct btrfs_super_block *sb, int max_mirrors) { struct buffer_head *bh; int i; @@ -3408,57 +3241,33 @@ static int write_dev_supers(struct btrfs_device *device, device->commit_total_bytes) break; - if (wait) { - bh = __find_get_block(device->bdev, bytenr / 4096, - BTRFS_SUPER_INFO_SIZE); - if (!bh) { - errors++; - continue; - } - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - errors++; + btrfs_set_super_bytenr(sb, bytenr); - /* drop our reference */ - brelse(bh); + crc = ~(u32)0; + crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, sb->csum); - /* drop the reference from the wait == 0 run */ - brelse(bh); + /* One reference for us, and we leave it for the caller */ + bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, + BTRFS_SUPER_INFO_SIZE); + if (!bh) { + btrfs_err(device->fs_info, + "couldn't get super buffer head for bytenr %llu", + bytenr); + errors++; continue; - } else { - btrfs_set_super_bytenr(sb, bytenr); - - crc = ~(u32)0; - crc = btrfs_csum_data((const char *)sb + - BTRFS_CSUM_SIZE, crc, - BTRFS_SUPER_INFO_SIZE - - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, sb->csum); - - /* - * one reference for us, and we leave it for the - * caller - */ - bh = __getblk(device->bdev, bytenr / 4096, - BTRFS_SUPER_INFO_SIZE); - if (!bh) { - btrfs_err(device->fs_info, - "couldn't get super buffer head for bytenr %llu", - bytenr); - errors++; - continue; - } + } - memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); + memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE); - /* one reference for submit_bh */ - get_bh(bh); + /* one reference for submit_bh */ + get_bh(bh); - set_buffer_uptodate(bh); - lock_buffer(bh); - bh->b_end_io = btrfs_end_buffer_write_sync; - bh->b_private = device; - } + set_buffer_uptodate(bh); + lock_buffer(bh); + bh->b_end_io = btrfs_end_buffer_write_sync; + bh->b_private = device; /* * we fua the first super. The others we allow @@ -3466,9 +3275,10 @@ static int write_dev_supers(struct btrfs_device *device, */ if (i == 0) { ret = btrfsic_submit_bh(REQ_OP_WRITE, - REQ_SYNC | REQ_FUA, bh); + REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh); } else { - ret = btrfsic_submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + ret = btrfsic_submit_bh(REQ_OP_WRITE, + REQ_SYNC | REQ_META | REQ_PRIO, bh); } if (ret) errors++; @@ -3477,6 +3287,50 @@ static int write_dev_supers(struct btrfs_device *device, } /* + * Wait for write completion of superblocks done by write_dev_supers, + * @max_mirrors same for write and wait phases. + * + * Return number of errors when buffer head is not found or not marked up to + * date. + */ +static int wait_dev_supers(struct btrfs_device *device, int max_mirrors) +{ + struct buffer_head *bh; + int i; + int errors = 0; + u64 bytenr; + + if (max_mirrors == 0) + max_mirrors = BTRFS_SUPER_MIRROR_MAX; + + for (i = 0; i < max_mirrors; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + device->commit_total_bytes) + break; + + bh = __find_get_block(device->bdev, + bytenr / BTRFS_BDEV_BLOCKSIZE, + BTRFS_SUPER_INFO_SIZE); + if (!bh) { + errors++; + continue; + } + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + errors++; + + /* drop our reference */ + brelse(bh); + + /* drop the reference from the writing run */ + brelse(bh); + } + + return errors < i ? 0 : -1; +} + +/* * endio for the write_dev_flush, this will wake anyone waiting * for the barrier when it is done */ @@ -3499,12 +3353,12 @@ static void write_dev_flush(struct btrfs_device *device) bio_reset(bio); bio->bi_end_io = btrfs_end_empty_barrier; - bio->bi_bdev = device->bdev; + bio_set_dev(bio, device->bdev); bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; init_completion(&device->flush_wait); bio->bi_private = &device->flush_wait; - submit_bio(bio); + btrfsic_submit_bio(bio); device->flush_bio_sent = 1; } @@ -3516,7 +3370,7 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) struct bio *bio = device->flush_bio; if (!device->flush_bio_sent) - return 0; + return BLK_STS_OK; device->flush_bio_sent = 0; wait_for_completion_io(&device->flush_wait); @@ -3524,20 +3378,10 @@ static blk_status_t wait_dev_flush(struct btrfs_device *device) return bio->bi_status; } -static int check_barrier_error(struct btrfs_fs_devices *fsdevs) +static int check_barrier_error(struct btrfs_fs_info *fs_info) { - int dev_flush_error = 0; - struct btrfs_device *dev; - - list_for_each_entry_rcu(dev, &fsdevs->devices, dev_list) { - if (!dev->bdev || dev->last_flush_error) - dev_flush_error++; - } - - if (dev_flush_error > - fsdevs->fs_info->num_tolerated_disk_barrier_failures) + if (!btrfs_check_rw_degradable(fs_info)) return -EIO; - return 0; } @@ -3563,7 +3407,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) continue; write_dev_flush(dev); - dev->last_flush_error = 0; + dev->last_flush_error = BLK_STS_OK; } /* wait for all the barriers */ @@ -3592,7 +3436,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info) * to arrive at the volume status. So error checking * is being pushed to a separate loop. */ - return check_barrier_error(info->fs_devices); + return check_barrier_error(info); } return 0; } @@ -3626,60 +3470,6 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) return min_tolerated; } -int btrfs_calc_num_tolerated_disk_barrier_failures( - struct btrfs_fs_info *fs_info) -{ - struct btrfs_ioctl_space_info space; - struct btrfs_space_info *sinfo; - u64 types[] = {BTRFS_BLOCK_GROUP_DATA, - BTRFS_BLOCK_GROUP_SYSTEM, - BTRFS_BLOCK_GROUP_METADATA, - BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; - int i; - int c; - int num_tolerated_disk_barrier_failures = - (int)fs_info->fs_devices->num_devices; - - for (i = 0; i < ARRAY_SIZE(types); i++) { - struct btrfs_space_info *tmp; - - sinfo = NULL; - rcu_read_lock(); - list_for_each_entry_rcu(tmp, &fs_info->space_info, list) { - if (tmp->flags == types[i]) { - sinfo = tmp; - break; - } - } - rcu_read_unlock(); - - if (!sinfo) - continue; - - down_read(&sinfo->groups_sem); - for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { - u64 flags; - - if (list_empty(&sinfo->block_groups[c])) - continue; - - btrfs_get_block_group_info(&sinfo->block_groups[c], - &space); - if (space.total_bytes == 0 || space.used_bytes == 0) - continue; - flags = space.flags; - - num_tolerated_disk_barrier_failures = min( - num_tolerated_disk_barrier_failures, - btrfs_get_num_tolerated_disk_barrier_failures( - flags)); - } - up_read(&sinfo->groups_sem); - } - - return num_tolerated_disk_barrier_failures; -} - int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) { struct list_head *head; @@ -3693,7 +3483,14 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) u64 flags; do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); - backup_super_roots(fs_info); + + /* + * max_mirrors == 0 indicates we're from commit_transaction, + * not from fsync where the tree roots in fs_info have not + * been consistent on disk. + */ + if (max_mirrors == 0) + backup_super_roots(fs_info); sb = fs_info->super_for_commit; dev_item = &sb->dev_item; @@ -3732,12 +3529,12 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE); - memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE); + memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_FSID_SIZE); flags = btrfs_super_flags(sb); btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN); - ret = write_dev_supers(dev, sb, 0, max_mirrors); + ret = write_dev_supers(dev, sb, max_mirrors); if (ret) total_errors++; } @@ -3760,7 +3557,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors) if (!dev->in_fs_metadata || !dev->writeable) continue; - ret = write_dev_supers(dev, sb, 1, max_mirrors); + ret = wait_dev_supers(dev, max_mirrors); if (ret) total_errors++; } @@ -3928,7 +3725,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) cancel_work_sync(&fs_info->async_reclaim_work); - if (!(fs_info->sb->s_flags & MS_RDONLY)) { + if (!sb_rdonly(fs_info->sb)) { /* * If the cleaner thread is stopped and there are * block groups queued for removal, the deletion will be @@ -3991,11 +3788,11 @@ void close_ctree(struct btrfs_fs_info *fs_info) cleanup_srcu_struct(&fs_info->subvol_srcu); btrfs_free_stripe_hash_table(fs_info); + btrfs_free_ref_cache(fs_info); __btrfs_free_block_rsv(root->orphan_block_rsv); root->orphan_block_rsv = NULL; - mutex_lock(&fs_info->chunk_mutex); while (!list_empty(&fs_info->pinned_chunks)) { struct extent_map *em; @@ -4004,7 +3801,6 @@ void close_ctree(struct btrfs_fs_info *fs_info) list_del_init(&em->list); free_extent_map(em); } - mutex_unlock(&fs_info->chunk_mutex); } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, @@ -4052,8 +3848,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) buf->len, fs_info->dirty_metadata_batch); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY - if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { - btrfs_print_leaf(fs_info, buf); + if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { + btrfs_print_leaf(buf); ASSERT(0); } #endif @@ -4173,7 +3969,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) ret = -EINVAL; } - if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { + if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) { btrfs_err(fs_info, "dev_item UUID does not match fsid: %pU != %pU", fs_info->fsid, sb->dev_item.fsid); @@ -4317,26 +4113,28 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, while ((node = rb_first(&delayed_refs->href_root)) != NULL) { struct btrfs_delayed_ref_head *head; - struct btrfs_delayed_ref_node *tmp; + struct rb_node *n; bool pin_bytes = false; head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); if (!mutex_trylock(&head->mutex)) { - refcount_inc(&head->node.refs); + refcount_inc(&head->refs); spin_unlock(&delayed_refs->lock); mutex_lock(&head->mutex); mutex_unlock(&head->mutex); - btrfs_put_delayed_ref(&head->node); + btrfs_put_delayed_ref_head(head); spin_lock(&delayed_refs->lock); continue; } spin_lock(&head->lock); - list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list, - list) { + while ((n = rb_first(&head->ref_tree)) != NULL) { + ref = rb_entry(n, struct btrfs_delayed_ref_node, + ref_node); ref->in_tree = 0; - list_del(&ref->list); + rb_erase(&ref->ref_node, &head->ref_tree); + RB_CLEAR_NODE(&ref->ref_node); if (!list_empty(&ref->add_list)) list_del(&ref->add_list); atomic_dec(&delayed_refs->num_entries); @@ -4349,16 +4147,16 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, if (head->processing == 0) delayed_refs->num_heads_ready--; atomic_dec(&delayed_refs->num_entries); - head->node.in_tree = 0; rb_erase(&head->href_node, &delayed_refs->href_root); + RB_CLEAR_NODE(&head->href_node); spin_unlock(&head->lock); spin_unlock(&delayed_refs->lock); mutex_unlock(&head->mutex); if (pin_bytes) - btrfs_pin_extent(fs_info, head->node.bytenr, - head->node.num_bytes, 1); - btrfs_put_delayed_ref(&head->node); + btrfs_pin_extent(fs_info, head->bytenr, + head->num_bytes, 1); + btrfs_put_delayed_ref_head(head); cond_resched(); spin_lock(&delayed_refs->lock); } |