diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 460 |
1 files changed, 234 insertions, 226 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5eb7217738ed..f1ecb938ba4d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -152,7 +152,15 @@ struct list_head *btrfs_get_fs_uuids(void) return &fs_uuids; } -static struct btrfs_fs_devices *__alloc_fs_devices(void) +/* + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: if not NULL, copy the uuid to fs_devices::fsid + * + * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR(). + * The returned struct is not linked onto any lists and can be destroyed with + * kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) { struct btrfs_fs_devices *fs_devs; @@ -166,31 +174,8 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void) INIT_LIST_HEAD(&fs_devs->resized_devices); INIT_LIST_HEAD(&fs_devs->alloc_list); INIT_LIST_HEAD(&fs_devs->list); - - return fs_devs; -} - -/** - * alloc_fs_devices - allocate struct btrfs_fs_devices - * @fsid: a pointer to UUID for this FS. If NULL a new UUID is - * generated. - * - * Return: a pointer to a new &struct btrfs_fs_devices on success; - * ERR_PTR() on error. Returned struct is not linked onto any lists and - * can be destroyed with kfree() right away. - */ -static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) -{ - struct btrfs_fs_devices *fs_devs; - - fs_devs = __alloc_fs_devices(); - if (IS_ERR(fs_devs)) - return fs_devs; - if (fsid) memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); - else - generate_random_uuid(fs_devs->fsid); return fs_devs; } @@ -269,9 +254,17 @@ static struct btrfs_device *__alloc_device(void) return dev; } -static noinline struct btrfs_device *__find_device(struct list_head *head, - u64 devid, u8 *uuid) +/* + * Find a device specified by @devid or @uuid in the list of @fs_devices, or + * return NULL. + * + * If devid and uuid are both specified, the match must be exact, otherwise + * only devid is used. + */ +static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices, + u64 devid, const u8 *uuid) { + struct list_head *head = &fs_devices->devices; struct btrfs_device *dev; list_for_each_entry(dev, head, dev_list) { @@ -310,7 +303,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, if (flush) filemap_write_and_wait((*bdev)->bd_inode->i_mapping); - ret = set_blocksize(*bdev, 4096); + ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE); if (ret) { blkdev_put(*bdev, flags); goto error; @@ -367,7 +360,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) int again = 0; unsigned long num_run; unsigned long batch_run = 0; - unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; int sync_pending = 0; @@ -382,8 +374,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) blk_start_plug(&plug); bdi = device->bdev->bd_bdi; - limit = btrfs_async_submit_limit(fs_info); - limit = limit * 2 / 3; loop: spin_lock(&device->io_lock); @@ -450,13 +440,6 @@ loop_lock: pending = pending->bi_next; cur->bi_next = NULL; - /* - * atomic_dec_return implies a barrier for waitqueue_active - */ - if (atomic_dec_return(&fs_info->nr_async_bios) < limit && - waitqueue_active(&fs_info->async_submit_wait)) - wake_up(&fs_info->async_submit_wait); - BUG_ON(atomic_read(&cur->__bi_cnt) == 0); /* @@ -524,12 +507,6 @@ loop_lock: &device->work); goto done; } - /* unplug every 64 requests just for good measure */ - if (batch_run % 64 == 0) { - blk_finish_plug(&plug); - blk_start_plug(&plug); - sync_pending = 0; - } } cond_resched(); @@ -554,7 +531,7 @@ static void pending_bios_fn(struct btrfs_work *work) } -void btrfs_free_stale_device(struct btrfs_device *cur_dev) +static void btrfs_free_stale_device(struct btrfs_device *cur_dev) { struct btrfs_fs_devices *fs_devs; struct btrfs_device *dev; @@ -636,8 +613,8 @@ static noinline int device_list_add(const char *path, device = NULL; } else { - device = __find_device(&fs_devices->devices, devid, - disk_super->dev_item.uuid); + device = find_device(fs_devices, devid, + disk_super->dev_item.uuid); } if (!device) { @@ -1075,14 +1052,15 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, return ret; } -void btrfs_release_disk_super(struct page *page) +static void btrfs_release_disk_super(struct page *page) { kunmap(page); put_page(page); } -int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, - struct page **page, struct btrfs_super_block **disk_super) +static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, + struct page **page, + struct btrfs_super_block **disk_super) { void *p; pgoff_t index; @@ -1578,7 +1556,6 @@ out: static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 start, u64 num_bytes) { int ret; @@ -1606,12 +1583,12 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); - btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree); - btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid); + btrfs_set_dev_extent_chunk_tree(leaf, extent, + BTRFS_CHUNK_TREE_OBJECTID); + btrfs_set_dev_extent_chunk_objectid(leaf, extent, + BTRFS_FIRST_CHUNK_TREE_OBJECTID); btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); - write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); - btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); out: @@ -1726,7 +1703,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); ptr = btrfs_device_fsid(dev_item); - write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_UUID_SIZE); + write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(leaf); ret = 0; @@ -1825,8 +1802,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info, return 0; } -struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs, - struct btrfs_device *device) +static struct btrfs_device * btrfs_find_next_active_device( + struct btrfs_fs_devices *fs_devs, struct btrfs_device *device) { struct btrfs_device *next_device; @@ -1872,7 +1849,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, struct btrfs_fs_devices *cur_devices; u64 num_devices; int ret = 0; - bool clear_super = false; mutex_lock(&uuid_mutex); @@ -1908,7 +1884,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, list_del_init(&device->dev_alloc_list); device->fs_devices->rw_devices--; mutex_unlock(&fs_info->chunk_mutex); - clear_super = true; } mutex_unlock(&uuid_mutex); @@ -1987,9 +1962,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, free_fs_devices(cur_devices); } - fs_info->num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); - out: mutex_unlock(&uuid_mutex); return ret; @@ -2044,19 +2016,20 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, } btrfs_close_bdev(srcdev); - call_rcu(&srcdev->rcu, free_device); - /* - * unless fs_devices is seed fs, num_devices shouldn't go - * zero - */ - BUG_ON(!fs_devices->num_devices && !fs_devices->seeding); - /* if this is no devs we rather delete the fs_devices */ if (!fs_devices->num_devices) { struct btrfs_fs_devices *tmp_fs_devices; + /* + * On a mounted FS, num_devices can't be zero unless it's a + * seed. In case of a seed device being replaced, the replace + * target added to the sprout FS, so there will be no more + * device left under the seed FS. + */ + ASSERT(fs_devices->seeding); + tmp_fs_devices = fs_info->fs_devices; while (tmp_fs_devices) { if (tmp_fs_devices->seed == fs_devices) { @@ -2202,7 +2175,7 @@ static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info) if (!fs_devices->seeding) return -EINVAL; - seed_devices = __alloc_fs_devices(); + seed_devices = alloc_fs_devices(NULL); if (IS_ERR(seed_devices)) return PTR_ERR(seed_devices); @@ -2261,7 +2234,7 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, struct btrfs_dev_item *dev_item; struct btrfs_device *device; struct btrfs_key key; - u8 fs_uuid[BTRFS_UUID_SIZE]; + u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; u64 devid; int ret; @@ -2304,7 +2277,7 @@ next_slot: read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), - BTRFS_UUID_SIZE); + BTRFS_FSID_SIZE); device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); BUG_ON(!device); /* Logic error */ @@ -2336,8 +2309,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path u64 tmp; int seeding_dev = 0; int ret = 0; + bool unlocked = false; - if ((sb->s_flags & MS_RDONLY) && !fs_info->fs_devices->seeding) + if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) return -EROFS; bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, @@ -2407,12 +2381,15 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; device->dev_stats_valid = 1; - set_blocksize(device->bdev, 4096); + set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); if (seeding_dev) { sb->s_flags &= ~MS_RDONLY; ret = btrfs_prepare_sprout(fs_info); - BUG_ON(ret); /* -ENOMEM */ + if (ret) { + btrfs_abort_transaction(trans, ret); + goto error_trans; + } } device->fs_devices = fs_info->fs_devices; @@ -2458,14 +2435,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path mutex_unlock(&fs_info->chunk_mutex); if (ret) { btrfs_abort_transaction(trans, ret); - goto error_trans; + goto error_sysfs; } } ret = btrfs_add_device(trans, fs_info, device); if (ret) { btrfs_abort_transaction(trans, ret); - goto error_trans; + goto error_sysfs; } if (seeding_dev) { @@ -2474,7 +2451,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path ret = btrfs_finish_sprout(trans, fs_info); if (ret) { btrfs_abort_transaction(trans, ret); - goto error_trans; + goto error_sysfs; } /* Sprouting would change fsid of the mounted root, @@ -2487,13 +2464,12 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path "sysfs: failed to create fsid for sprout"); } - fs_info->num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); ret = btrfs_commit_transaction(trans); if (seeding_dev) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); + unlocked = true; if (ret) /* transaction commit */ return ret; @@ -2506,7 +2482,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (IS_ERR(trans)) { if (PTR_ERR(trans) == -ENOENT) return 0; - return PTR_ERR(trans); + ret = PTR_ERR(trans); + trans = NULL; + goto error_sysfs; } ret = btrfs_commit_transaction(trans); } @@ -2515,14 +2493,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path update_dev_time(device_path); return ret; +error_sysfs: + btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); error_trans: - btrfs_end_transaction(trans); + if (seeding_dev) + sb->s_flags |= MS_RDONLY; + if (trans) + btrfs_end_transaction(trans); rcu_string_free(device->name); - btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); - if (seeding_dev) { + if (seeding_dev && !unlocked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); } @@ -2612,7 +2594,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; device->dev_stats_valid = 1; - set_blocksize(device->bdev, 4096); + set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); fs_info->fs_devices->num_devices++; @@ -2702,7 +2684,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->chunk_mutex); old_total = btrfs_super_total_bytes(super_copy); - diff = new_size - device->total_bytes; + diff = round_down(new_size - device->total_bytes, fs_info->sectorsize); if (new_size <= device->total_bytes || device->is_tgtdev_for_dev_replace) { @@ -2728,8 +2710,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, } static int btrfs_free_chunk(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 chunk_objectid, - u64 chunk_offset) + struct btrfs_fs_info *fs_info, u64 chunk_offset) { struct btrfs_root *root = fs_info->chunk_root; int ret; @@ -2740,7 +2721,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - key.objectid = chunk_objectid; + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = chunk_offset; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -2763,8 +2744,7 @@ out: return ret; } -static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, - u64 chunk_objectid, u64 chunk_offset) +static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) { struct btrfs_super_block *super_copy = fs_info->super_copy; struct btrfs_disk_key *disk_key; @@ -2797,7 +2777,7 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, ret = -EIO; break; } - if (key.objectid == chunk_objectid && + if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID && key.offset == chunk_offset) { memmove(ptr, ptr + len, array_size - (cur + len)); array_size -= len; @@ -2846,7 +2826,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, struct extent_map *em; struct map_lookup *map; u64 dev_extent_len = 0; - u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; int i, ret = 0; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -2902,7 +2881,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, } mutex_unlock(&fs_devices->device_list_mutex); - ret = btrfs_free_chunk(trans, fs_info, chunk_objectid, chunk_offset); + ret = btrfs_free_chunk(trans, fs_info, chunk_offset); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -2911,8 +2890,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len); if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { - ret = btrfs_del_sys_chunk(fs_info, chunk_objectid, - chunk_offset); + ret = btrfs_del_sys_chunk(fs_info, chunk_offset); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -3312,7 +3290,6 @@ static int chunk_devid_filter(struct extent_buffer *leaf, /* [pstart, pend) */ static int chunk_drange_filter(struct extent_buffer *leaf, struct btrfs_chunk *chunk, - u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_stripe *stripe; @@ -3439,7 +3416,7 @@ static int should_balance_chunk(struct btrfs_fs_info *fs_info, /* drange filter, makes sense only with devid filter */ if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) && - chunk_drange_filter(leaf, chunk, chunk_offset, bargs)) { + chunk_drange_filter(leaf, chunk, bargs)) { return 0; } @@ -3898,13 +3875,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, meta_target, data_target); } - if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - fs_info->num_tolerated_disk_barrier_failures = min( - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info), - btrfs_get_num_tolerated_disk_barrier_failures( - bctl->sys.target)); - } - ret = insert_balance_item(fs_info, bctl); if (ret && ret != -EEXIST) goto out; @@ -3927,11 +3897,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, mutex_lock(&fs_info->balance_mutex); atomic_dec(&fs_info->balance_running); - if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { - fs_info->num_tolerated_disk_barrier_failures = - btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); - } - if (bargs) { memset(bargs, 0, sizeof(*bargs)); update_ioctl_balance_args(fs_info, 0, bargs); @@ -4085,7 +4050,7 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) { - if (fs_info->sb->s_flags & MS_RDONLY) + if (sb_rdonly(fs_info->sb)) return -EROFS; mutex_lock(&fs_info->balance_mutex); @@ -4127,7 +4092,6 @@ static int btrfs_uuid_scan_kthread(void *data) struct btrfs_fs_info *fs_info = data; struct btrfs_root *root = fs_info->tree_root; struct btrfs_key key; - struct btrfs_key max_key; struct btrfs_path *path = NULL; int ret = 0; struct extent_buffer *eb; @@ -4146,10 +4110,6 @@ static int btrfs_uuid_scan_kthread(void *data) key.type = BTRFS_ROOT_ITEM_KEY; key.offset = 0; - max_key.objectid = (u64)-1; - max_key.type = BTRFS_ROOT_ITEM_KEY; - max_key.offset = (u64)-1; - while (1) { ret = btrfs_search_forward(root, &key, path, 0); if (ret) { @@ -4406,7 +4366,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 diff; new_size = round_down(new_size, fs_info->sectorsize); - diff = old_size - new_size; + diff = round_down(old_size - new_size, fs_info->sectorsize); if (device->is_tgtdev_for_dev_replace) return -EINVAL; @@ -4601,12 +4561,6 @@ static int btrfs_cmp_device_info(const void *a, const void *b) return 0; } -static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) -{ - /* TODO allow them to set a preferred stripe size */ - return SZ_64K; -} - static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) { if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) @@ -4629,7 +4583,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *info = trans->fs_info; struct btrfs_fs_devices *fs_devices = info->fs_devices; - struct list_head *cur; + struct btrfs_device *device; struct map_lookup *map = NULL; struct extent_map_tree *em_tree; struct extent_map *em; @@ -4649,7 +4603,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 max_chunk_size; u64 stripe_size; u64 num_bytes; - u64 raid_stripe_len = BTRFS_STRIPE_LEN; int ndevs; int i; int j; @@ -4703,22 +4656,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (!devices_info) return -ENOMEM; - cur = fs_devices->alloc_list.next; - /* * in the first pass through the devices list, we gather information * about the available holes on each device. */ ndevs = 0; - while (cur != &fs_devices->alloc_list) { - struct btrfs_device *device; + list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { u64 max_avail; u64 dev_offset; - device = list_entry(cur, struct btrfs_device, dev_alloc_list); - - cur = cur->next; - if (!device->writeable) { WARN(1, KERN_ERR "BTRFS: read-only device in alloc_list\n"); @@ -4769,15 +4715,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, btrfs_cmp_device_info, NULL); /* round down to number of usable stripes */ - ndevs -= ndevs % devs_increment; + ndevs = round_down(ndevs, devs_increment); if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) { ret = -ENOSPC; goto error; } - if (devs_max && ndevs > devs_max) - ndevs = devs_max; + ndevs = min(ndevs, devs_max); + /* * the primary goal is to maximize the number of stripes, so use as many * devices as possible, even if the stripes are not maximum sized. @@ -4791,16 +4737,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, */ data_stripes = num_stripes / ncopies; - if (type & BTRFS_BLOCK_GROUP_RAID5) { - raid_stripe_len = find_raid56_stripe_len(ndevs - 1, - info->stripesize); + if (type & BTRFS_BLOCK_GROUP_RAID5) data_stripes = num_stripes - 1; - } - if (type & BTRFS_BLOCK_GROUP_RAID6) { - raid_stripe_len = find_raid56_stripe_len(ndevs - 2, - info->stripesize); + + if (type & BTRFS_BLOCK_GROUP_RAID6) data_stripes = num_stripes - 2; - } /* * Use the number of data stripes to figure out how big this chunk @@ -4825,8 +4766,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = div_u64(stripe_size, dev_stripes); /* align to BTRFS_STRIPE_LEN */ - stripe_size = div64_u64(stripe_size, raid_stripe_len); - stripe_size *= raid_stripe_len; + stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN); map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); if (!map) { @@ -4843,10 +4783,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, j * stripe_size; } } - map->sector_size = info->sectorsize; - map->stripe_len = raid_stripe_len; - map->io_align = raid_stripe_len; - map->io_width = raid_stripe_len; + map->stripe_len = BTRFS_STRIPE_LEN; + map->io_align = BTRFS_STRIPE_LEN; + map->io_width = BTRFS_STRIPE_LEN; map->type = type; map->sub_stripes = sub_stripes; @@ -4871,19 +4810,17 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em_tree = &info->mapping_tree.map_tree; write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); - if (!ret) { - list_add_tail(&em->list, &trans->transaction->pending_chunks); - refcount_inc(&em->refs); - } - write_unlock(&em_tree->lock); if (ret) { + write_unlock(&em_tree->lock); free_extent_map(em); goto error; } - ret = btrfs_make_block_group(trans, info, 0, type, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, - start, num_bytes); + list_add_tail(&em->list, &trans->transaction->pending_chunks); + refcount_inc(&em->refs); + write_unlock(&em_tree->lock); + + ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); if (ret) goto error_del_extent; @@ -4963,11 +4900,8 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, ret = btrfs_update_device(trans, device); if (ret) break; - ret = btrfs_alloc_dev_extent(trans, device, - chunk_root->root_key.objectid, - BTRFS_FIRST_CHUNK_TREE_OBJECTID, - chunk_offset, dev_offset, - stripe_size); + ret = btrfs_alloc_dev_extent(trans, device, chunk_offset, + dev_offset, stripe_size); if (ret) break; } @@ -5172,7 +5106,6 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) } unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, - struct btrfs_mapping_tree *map_tree, u64 logical) { struct extent_map *em; @@ -5180,29 +5113,30 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, unsigned long len = fs_info->sectorsize; em = get_chunk_map(fs_info, logical, len); - WARN_ON(IS_ERR(em)); - map = em->map_lookup; - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) - len = map->stripe_len * nr_data_stripes(map); - free_extent_map(em); + if (!WARN_ON(IS_ERR(em))) { + map = em->map_lookup; + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) + len = map->stripe_len * nr_data_stripes(map); + free_extent_map(em); + } return len; } -int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, - u64 logical, u64 len, int mirror_num) +int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len) { struct extent_map *em; struct map_lookup *map; int ret = 0; em = get_chunk_map(fs_info, logical, len); - WARN_ON(IS_ERR(em)); - map = em->map_lookup; - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) - ret = 1; - free_extent_map(em); + if(!WARN_ON(IS_ERR(em))) { + map = em->map_lookup; + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) + ret = 1; + free_extent_map(em); + } return ret; } @@ -5758,10 +5692,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (map->type & BTRFS_BLOCK_GROUP_RAID0) { stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS) + if (!need_full_stripe(op)) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) + if (need_full_stripe(op)) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -5774,7 +5708,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) { + if (need_full_stripe(op)) { num_stripes = map->num_stripes; } else if (mirror_num) { stripe_index = mirror_num - 1; @@ -5788,7 +5722,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); stripe_index *= map->sub_stripes; - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) + if (need_full_stripe(op)) num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; @@ -5803,9 +5737,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, } } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - if (need_raid_map && - (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS || - mirror_num > 1)) { + if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { /* push stripe_nr back to the start of the full stripe */ stripe_nr = div64_u64(raid56_full_stripe_start, stripe_len * nr_data_stripes(map)); @@ -5832,9 +5764,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, &stripe_index); - if ((op != BTRFS_MAP_WRITE && - op != BTRFS_MAP_GET_READ_MIRRORS) && - mirror_num <= 1) + if (!need_full_stripe(op) && mirror_num <= 1) mirror_num = 1; } } else { @@ -6096,7 +6026,7 @@ static void btrfs_end_bio(struct bio *bio) * this bio is actually up to date, we didn't * go over the max number of errors */ - bio->bi_status = 0; + bio->bi_status = BLK_STS_OK; } btrfs_end_bbio(bbio, bio); @@ -6132,13 +6062,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device, return; } - /* - * nr_async_bios allows us to reliably return congestion to the - * higher layers. Otherwise, the async bio makes it appear we have - * made progress against dirty pages when we've really just put it - * on a queue for later - */ - atomic_inc(&fs_info->nr_async_bios); WARN_ON(bio->bi_next); bio->bi_next = NULL; @@ -6188,7 +6111,7 @@ static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio, rcu_read_unlock(); } #endif - bio->bi_bdev = dev->bdev; + bio_set_dev(bio, dev->bdev); btrfs_bio_counter_inc_noblocked(fs_info); @@ -6207,13 +6130,16 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_iter.bi_sector = logical >> 9; - bio->bi_status = BLK_STS_IOERR; + if (atomic_read(&bbio->error) > bbio->max_errors) + bio->bi_status = BLK_STS_IOERR; + else + bio->bi_status = BLK_STS_OK; btrfs_end_bbio(bbio, bio); } } -int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, - int mirror_num, int async_submit) +blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, + int mirror_num, int async_submit) { struct btrfs_device *dev; struct bio *first_bio = bio; @@ -6229,11 +6155,11 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, map_length = length; btrfs_bio_counter_inc_blocked(fs_info); - ret = __btrfs_map_block(fs_info, bio_op(bio), logical, + ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, &bbio, mirror_num, 1); if (ret) { btrfs_bio_counter_dec(fs_info); - return ret; + return errno_to_blk_status(ret); } total_devs = bbio->num_stripes; @@ -6256,7 +6182,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, } btrfs_bio_counter_dec(fs_info); - return ret; + return errno_to_blk_status(ret); } if (map_length < length) { @@ -6283,7 +6209,7 @@ int btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, dev_nr, async_submit); } btrfs_bio_counter_dec(fs_info); - return 0; + return BLK_STS_OK; } struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, @@ -6295,9 +6221,8 @@ struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, cur_devices = fs_info->fs_devices; while (cur_devices) { if (!fsid || - !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { - device = __find_device(&cur_devices->devices, - devid, uuid); + !memcmp(cur_devices->fsid, fsid, BTRFS_FSID_SIZE)) { + device = find_device(cur_devices, devid, uuid); if (device) return device; } @@ -6313,7 +6238,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, device = btrfs_alloc_device(NULL, &devid, dev_uuid); if (IS_ERR(device)) - return NULL; + return device; list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; @@ -6441,6 +6366,17 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, return 0; } +static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, + u64 devid, u8 *uuid, bool error) +{ + if (error) + btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing", + devid, uuid); + else + btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", + devid, uuid); +} + static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -6450,7 +6386,6 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, struct extent_map *em; u64 logical; u64 length; - u64 stripe_len; u64 devid; u8 uuid[BTRFS_UUID_SIZE]; int num_stripes; @@ -6459,7 +6394,6 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, logical = key->offset; length = btrfs_chunk_length(leaf, chunk); - stripe_len = btrfs_chunk_stripe_len(leaf, chunk); num_stripes = btrfs_chunk_num_stripes(leaf, chunk); ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical); @@ -6498,7 +6432,6 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, map->num_stripes = num_stripes; map->io_width = btrfs_chunk_io_width(leaf, chunk); map->io_align = btrfs_chunk_io_align(leaf, chunk); - map->sector_size = btrfs_chunk_sector_size(leaf, chunk); map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); map->type = btrfs_chunk_type(leaf, chunk); map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); @@ -6514,18 +6447,21 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, if (!map->stripes[i].dev && !btrfs_test_opt(fs_info, DEGRADED)) { free_extent_map(em); - return -EIO; + btrfs_report_missing_device(fs_info, devid, uuid, true); + return -ENOENT; } if (!map->stripes[i].dev) { map->stripes[i].dev = add_missing_dev(fs_info->fs_devices, devid, uuid); - if (!map->stripes[i].dev) { + if (IS_ERR(map->stripes[i].dev)) { free_extent_map(em); - return -EIO; + btrfs_err(fs_info, + "failed to init missing dev %llu: %ld", + devid, PTR_ERR(map->stripes[i].dev)); + return PTR_ERR(map->stripes[i].dev); } - btrfs_warn(fs_info, "devid %llu uuid %pU is missing", - devid, uuid); + btrfs_report_missing_device(fs_info, devid, uuid, false); } map->stripes[i].dev->in_fs_metadata = 1; } @@ -6569,10 +6505,11 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, int ret; BUG_ON(!mutex_is_locked(&uuid_mutex)); + ASSERT(fsid); fs_devices = fs_info->fs_devices->seed; while (fs_devices) { - if (!memcmp(fs_devices->fsid, fsid, BTRFS_UUID_SIZE)) + if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE)) return fs_devices; fs_devices = fs_devices->seed; @@ -6625,16 +6562,16 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, struct btrfs_device *device; u64 devid; int ret; - u8 fs_uuid[BTRFS_UUID_SIZE]; + u8 fs_uuid[BTRFS_FSID_SIZE]; u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), - BTRFS_UUID_SIZE); + BTRFS_FSID_SIZE); - if (memcmp(fs_uuid, fs_info->fsid, BTRFS_UUID_SIZE)) { + if (memcmp(fs_uuid, fs_info->fsid, BTRFS_FSID_SIZE)) { fs_devices = open_seed_devices(fs_info, fs_uuid); if (IS_ERR(fs_devices)) return PTR_ERR(fs_devices); @@ -6642,17 +6579,30 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); if (!device) { - if (!btrfs_test_opt(fs_info, DEGRADED)) - return -EIO; + if (!btrfs_test_opt(fs_info, DEGRADED)) { + btrfs_report_missing_device(fs_info, devid, + dev_uuid, true); + return -ENOENT; + } device = add_missing_dev(fs_devices, devid, dev_uuid); - if (!device) - return -ENOMEM; - btrfs_warn(fs_info, "devid %llu uuid %pU missing", - devid, dev_uuid); + if (IS_ERR(device)) { + btrfs_err(fs_info, + "failed to add missing dev %llu: %ld", + devid, PTR_ERR(device)); + return PTR_ERR(device); + } + btrfs_report_missing_device(fs_info, devid, dev_uuid, false); } else { - if (!device->bdev && !btrfs_test_opt(fs_info, DEGRADED)) - return -EIO; + if (!device->bdev) { + if (!btrfs_test_opt(fs_info, DEGRADED)) { + btrfs_report_missing_device(fs_info, + devid, dev_uuid, true); + return -ENOENT; + } + btrfs_report_missing_device(fs_info, devid, + dev_uuid, false); + } if(!device->bdev && !device->missing) { /* @@ -6818,6 +6768,64 @@ out_short_read: return -EIO; } +/* + * Check if all chunks in the fs are OK for read-write degraded mount + * + * Return true if all chunks meet the minimal RW mount requirements. + * Return false if any chunk doesn't meet the minimal RW mount requirements. + */ +bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info) +{ + struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map *em; + u64 next_start = 0; + bool ret = true; + + read_lock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1); + read_unlock(&map_tree->map_tree.lock); + /* No chunk at all? Return false anyway */ + if (!em) { + ret = false; + goto out; + } + while (em) { + struct map_lookup *map; + int missing = 0; + int max_tolerated; + int i; + + map = em->map_lookup; + max_tolerated = + btrfs_get_num_tolerated_disk_barrier_failures( + map->type); + for (i = 0; i < map->num_stripes; i++) { + struct btrfs_device *dev = map->stripes[i].dev; + + if (!dev || !dev->bdev || dev->missing || + dev->last_flush_error) + missing++; + } + if (missing > max_tolerated) { + btrfs_warn(fs_info, + "chunk %llu missing %d devices, max tolerance is %d for writeable mount", + em->start, missing, max_tolerated); + free_extent_map(em); + ret = false; + goto out; + } + next_start = extent_map_end(em); + free_extent_map(em); + + read_lock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(&map_tree->map_tree, next_start, + (u64)(-1) - next_start); + read_unlock(&map_tree->map_tree.lock); + } +out: + return ret; +} + int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info) { struct btrfs_root *root = fs_info->chunk_root; |