summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2017-12-01 10:05:14 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2017-12-01 10:05:14 +1100
commitd795ad0b06209ff30fb79e34c75363a025d3f617 (patch)
treecdf4e01852598b9bf2c6476ab848880479e674e4 /fs
parentf41693dc821565ee610a9346399494e7556a02cf (diff)
parent930a0ad9223a2a944023ac8cd15f0e866a7f75e6 (diff)
Merge remote-tracking branch 'btrfs-kdave/for-next'
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/compression.c21
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c18
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/delayed-inode.c59
-rw-r--r--fs/btrfs/delayed-ref.c2
-rw-r--r--fs/btrfs/delayed-ref.h2
-rw-r--r--fs/btrfs/dir-item.c108
-rw-r--r--fs/btrfs/disk-io.c38
-rw-r--r--fs/btrfs/disk-io.h3
-rw-r--r--fs/btrfs/export.c5
-rw-r--r--fs/btrfs/extent-tree.c10
-rw-r--r--fs/btrfs/extent_io.c85
-rw-r--r--fs/btrfs/extent_io.h36
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file.c361
-rw-r--r--fs/btrfs/free-space-cache.c8
-rw-r--r--fs/btrfs/inode.c93
-rw-r--r--fs/btrfs/ioctl.c41
-rw-r--r--fs/btrfs/props.c9
-rw-r--r--fs/btrfs/qgroup.c172
-rw-r--r--fs/btrfs/qgroup.h28
-rw-r--r--fs/btrfs/ref-verify.c6
-rw-r--r--fs/btrfs/root-tree.c7
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/super.c266
-rw-r--r--fs/btrfs/sysfs.c2
-rw-r--r--fs/btrfs/tests/inode-tests.c17
-rw-r--r--fs/btrfs/transaction.c15
-rw-r--r--fs/btrfs/transaction.h11
-rw-r--r--fs/btrfs/tree-checker.c141
-rw-r--r--fs/btrfs/tree-log.c54
-rw-r--r--fs/btrfs/volumes.c310
-rw-r--r--fs/btrfs/volumes.h26
-rw-r--r--fs/btrfs/xattr.c6
-rw-r--r--fs/btrfs/zstd.c151
36 files changed, 1258 insertions, 873 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 5982c8a71f02..06ef50712acd 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1522,12 +1522,23 @@ out:
unsigned int btrfs_compress_str2level(const char *str)
{
- if (strncmp(str, "zlib", 4) != 0)
+ long level;
+ int max;
+
+ if (strncmp(str, "zlib", 4) == 0)
+ max = 9;
+ else if (strncmp(str, "zstd", 4) == 0)
+ max = 15; // encoded on 4 bits, real max is 22
+ else
return 0;
- /* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */
- if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0)
- return str[5] - '0';
+ /* Accepted form: zxxx:1 up to zxxx:9 and nothing left after the number */
+ str += 4;
+ if (*str == ':')
+ str++;
+
+ if (kstrtoul(str, 10, &level))
+ return BTRFS_ZLIB_DEFAULT_LEVEL;
- return BTRFS_ZLIB_DEFAULT_LEVEL;
+ return (level > max) ? BTRFS_ZLIB_DEFAULT_LEVEL : level;
}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 0868cc554f14..6b692903a23c 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -75,7 +75,7 @@ struct compressed_bio {
u32 sums;
};
-void btrfs_init_compress(void);
+void __init btrfs_init_compress(void);
void btrfs_exit_compress(void);
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 531e0a8645b0..1e74cf826532 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1032,14 +1032,17 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
ret = btrfs_inc_ref(trans, root, buf, 1);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_dec_ref(trans, root, buf, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
ret = btrfs_inc_ref(trans, root, cow, 1);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
}
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else {
@@ -1049,7 +1052,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
}
if (new_flags != 0) {
int level = btrfs_header_level(buf);
@@ -1068,9 +1072,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
ret = btrfs_dec_ref(trans, root, buf, 1);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
}
clean_tree_block(fs_info, buf);
*last_ref = 1;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 13c260b525a1..09b72b6996ce 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3060,15 +3060,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_path *path, u64 dir,
const char *name, u16 name_len,
int mod);
-int verify_dir_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf, int slot,
- struct btrfs_dir_item *dir_item);
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
struct btrfs_path *path,
const char *name,
int name_len);
-bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
- unsigned long start, u16 name_len);
/* orphan.c */
int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -3197,7 +3192,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
-int btrfs_init_cachep(void);
+int __init btrfs_init_cachep(void);
void btrfs_destroy_cachep(void);
long btrfs_ioctl_trans_end(struct file *file);
struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
@@ -3248,7 +3243,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
struct file *dst_file, u64 dst_loff);
/* file.c */
-int btrfs_auto_defrag_init(void);
+int __init btrfs_auto_defrag_init(void);
void btrfs_auto_defrag_exit(void);
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode);
@@ -3283,7 +3278,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
/* sysfs.c */
-int btrfs_init_sysfs(void);
+int __init btrfs_init_sysfs(void);
void btrfs_exit_sysfs(void);
int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info);
void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5d73f79ded8b..f8e59ac4967e 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1279,40 +1279,42 @@ static void btrfs_async_run_delayed_root(struct btrfs_work *work)
if (!path)
goto out;
-again:
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
- goto free_path;
+ do {
+ if (atomic_read(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND / 2)
+ break;
- delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
- if (!delayed_node)
- goto free_path;
+ delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+ if (!delayed_node)
+ break;
- path->leave_spinning = 1;
- root = delayed_node->root;
+ path->leave_spinning = 1;
+ root = delayed_node->root;
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
- goto release_path;
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ btrfs_release_path(path);
+ btrfs_release_prepared_delayed_node(delayed_node);
+ total_done++;
+ continue;
+ }
- block_rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->delayed_block_rsv;
+ block_rsv = trans->block_rsv;
+ trans->block_rsv = &root->fs_info->delayed_block_rsv;
- __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
+ __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
- trans->block_rsv = block_rsv;
- btrfs_end_transaction(trans);
- btrfs_btree_balance_dirty_nodelay(root->fs_info);
+ trans->block_rsv = block_rsv;
+ btrfs_end_transaction(trans);
+ btrfs_btree_balance_dirty_nodelay(root->fs_info);
-release_path:
- btrfs_release_path(path);
- total_done++;
+ btrfs_release_path(path);
+ btrfs_release_prepared_delayed_node(delayed_node);
+ total_done++;
- btrfs_release_prepared_delayed_node(delayed_node);
- if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
- total_done < async_work->nr)
- goto again;
+ } while ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK)
+ || total_done < async_work->nr);
-free_path:
btrfs_free_path(path);
out:
wake_up(&delayed_root->wait);
@@ -1325,10 +1327,6 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
{
struct btrfs_async_delayed_work *async_work;
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
- btrfs_workqueue_normal_congested(fs_info->delayed_workers))
- return 0;
-
async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
if (!async_work)
return -ENOMEM;
@@ -1364,7 +1362,8 @@ void btrfs_balance_delayed_items(struct btrfs_fs_info *fs_info)
{
struct btrfs_delayed_root *delayed_root = fs_info->delayed_root;
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
+ if ((atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND) ||
+ btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return;
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 83be8f9fd906..a1a40cf382e3 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -937,7 +937,7 @@ void btrfs_delayed_ref_exit(void)
kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
-int btrfs_delayed_ref_init(void)
+int __init btrfs_delayed_ref_init(void)
{
btrfs_delayed_ref_head_cachep = kmem_cache_create(
"btrfs_delayed_ref_head",
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a43af432f859..c4f625e5a691 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -203,7 +203,7 @@ extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
-int btrfs_delayed_ref_init(void);
+int __init btrfs_delayed_ref_init(void);
void btrfs_delayed_ref_exit(void);
static inline struct btrfs_delayed_extent_op *
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 41cb9196eaa8..cbe421605cd5 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -403,8 +403,6 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
btrfs_dir_data_len(leaf, dir_item);
name_ptr = (unsigned long)(dir_item + 1);
- if (verify_dir_item(fs_info, leaf, path->slots[0], dir_item))
- return NULL;
if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
return dir_item;
@@ -450,109 +448,3 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
}
return ret;
}
-
-int verify_dir_item(struct btrfs_fs_info *fs_info,
- struct extent_buffer *leaf,
- int slot,
- struct btrfs_dir_item *dir_item)
-{
- u16 namelen = BTRFS_NAME_LEN;
- int ret;
- u8 type = btrfs_dir_type(leaf, dir_item);
-
- if (type >= BTRFS_FT_MAX) {
- btrfs_crit(fs_info, "invalid dir item type: %d", (int)type);
- return 1;
- }
-
- if (type == BTRFS_FT_XATTR)
- namelen = XATTR_NAME_MAX;
-
- if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
- btrfs_crit(fs_info, "invalid dir item name len: %u",
- (unsigned)btrfs_dir_name_len(leaf, dir_item));
- return 1;
- }
-
- namelen = btrfs_dir_name_len(leaf, dir_item);
- ret = btrfs_is_name_len_valid(leaf, slot,
- (unsigned long)(dir_item + 1), namelen);
- if (!ret)
- return 1;
-
- /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
- if ((btrfs_dir_data_len(leaf, dir_item) +
- btrfs_dir_name_len(leaf, dir_item)) >
- BTRFS_MAX_XATTR_SIZE(fs_info)) {
- btrfs_crit(fs_info, "invalid dir item name + data len: %u + %u",
- (unsigned)btrfs_dir_name_len(leaf, dir_item),
- (unsigned)btrfs_dir_data_len(leaf, dir_item));
- return 1;
- }
-
- return 0;
-}
-
-bool btrfs_is_name_len_valid(struct extent_buffer *leaf, int slot,
- unsigned long start, u16 name_len)
-{
- struct btrfs_fs_info *fs_info = leaf->fs_info;
- struct btrfs_key key;
- u32 read_start;
- u32 read_end;
- u32 item_start;
- u32 item_end;
- u32 size;
- bool ret = true;
-
- ASSERT(start > BTRFS_LEAF_DATA_OFFSET);
-
- read_start = start - BTRFS_LEAF_DATA_OFFSET;
- read_end = read_start + name_len;
- item_start = btrfs_item_offset_nr(leaf, slot);
- item_end = btrfs_item_end_nr(leaf, slot);
-
- btrfs_item_key_to_cpu(leaf, &key, slot);
-
- switch (key.type) {
- case BTRFS_DIR_ITEM_KEY:
- case BTRFS_XATTR_ITEM_KEY:
- case BTRFS_DIR_INDEX_KEY:
- size = sizeof(struct btrfs_dir_item);
- break;
- case BTRFS_INODE_REF_KEY:
- size = sizeof(struct btrfs_inode_ref);
- break;
- case BTRFS_INODE_EXTREF_KEY:
- size = sizeof(struct btrfs_inode_extref);
- break;
- case BTRFS_ROOT_REF_KEY:
- case BTRFS_ROOT_BACKREF_KEY:
- size = sizeof(struct btrfs_root_ref);
- break;
- default:
- ret = false;
- goto out;
- }
-
- if (read_start < item_start) {
- ret = false;
- goto out;
- }
- if (read_end > item_end) {
- ret = false;
- goto out;
- }
-
- /* there shall be item(s) before name */
- if (read_start - item_start < size) {
- ret = false;
- goto out;
- }
-
-out:
- if (!ret)
- btrfs_crit(fs_info, "invalid dir item name len: %u",
- (unsigned int)name_len);
- return ret;
-}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 10a2a579cc7f..82c96607fc46 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -220,7 +220,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
* extents on the btree inode are pretty simple, there's one extent
* that covers the entire device
*/
-static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
+struct extent_map *btree_get_extent(struct btrfs_inode *inode,
struct page *page, size_t pg_offset, u64 start, u64 len,
int create)
{
@@ -285,7 +285,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
int verify)
{
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
- char *result = NULL;
+ char result[BTRFS_CSUM_SIZE];
unsigned long len;
unsigned long cur_len;
unsigned long offset = BTRFS_CSUM_SIZE;
@@ -294,7 +294,6 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
unsigned long map_len;
int err;
u32 crc = ~(u32)0;
- unsigned long inline_result;
len = buf->len - offset;
while (len > 0) {
@@ -308,13 +307,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
len -= cur_len;
offset += cur_len;
}
- if (csum_size > sizeof(inline_result)) {
- result = kzalloc(csum_size, GFP_NOFS);
- if (!result)
- return -ENOMEM;
- } else {
- result = (char *)&inline_result;
- }
+ memset(result, 0, BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, result);
@@ -329,15 +322,12 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
"%s checksum verify failed on %llu wanted %X found %X level %d",
fs_info->sb->s_id, buf->start,
val, found, btrfs_header_level(buf));
- if (result != (char *)&inline_result)
- kfree(result);
return -EUCLEAN;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
}
- if (result != (char *)&inline_result)
- kfree(result);
+
return 0;
}
@@ -455,7 +445,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_fs_info *fs_info,
io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
- btree_get_extent, mirror_num);
+ mirror_num);
if (!ret) {
if (!verify_parent_transid(io_tree, eb,
parent_transid, 0))
@@ -1012,7 +1002,7 @@ void readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr)
if (IS_ERR(buf))
return;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
- buf, WAIT_NONE, btree_get_extent, 0);
+ buf, WAIT_NONE, 0);
free_extent_buffer(buf);
}
@@ -1031,7 +1021,7 @@ int reada_tree_block_flagged(struct btrfs_fs_info *fs_info, u64 bytenr,
set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
- btree_get_extent, mirror_num);
+ mirror_num);
if (ret) {
free_extent_buffer(buf);
return ret;
@@ -1243,7 +1233,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
- uuid_le uuid;
+ uuid_le uuid = { 0 };
root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
@@ -1284,7 +1274,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
- uuid_le_gen(&uuid);
+ if (is_fstree(objectid))
+ uuid_le_gen(&uuid);
memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
root->root_item.drop_level = 0;
@@ -3396,9 +3387,10 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
int errors_wait = 0;
blk_status_t ret;
+ lockdep_assert_held(&info->fs_devices->device_list_mutex);
/* send down all the barriers */
head = &info->fs_devices->devices;
- list_for_each_entry_rcu(dev, head, dev_list) {
+ list_for_each_entry(dev, head, dev_list) {
if (dev->missing)
continue;
if (!dev->bdev)
@@ -3411,7 +3403,7 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
}
/* wait for all the barriers */
- list_for_each_entry_rcu(dev, head, dev_list) {
+ list_for_each_entry(dev, head, dev_list) {
if (dev->missing)
continue;
if (!dev->bdev) {
@@ -3510,7 +3502,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
}
}
- list_for_each_entry_rcu(dev, head, dev_list) {
+ list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev) {
total_errors++;
continue;
@@ -3551,7 +3543,7 @@ int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors)
}
total_errors = 0;
- list_for_each_entry_rcu(dev, head, dev_list) {
+ list_for_each_entry(dev, head, dev_list) {
if (!dev->bdev)
continue;
if (!dev->in_fs_metadata || !dev->writeable)
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 7f7c35d6347a..301151a50ac1 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -149,6 +149,9 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
+struct extent_map *btree_get_extent(struct btrfs_inode *inode,
+ struct page *page, size_t pg_offset, u64 start, u64 len,
+ int create);
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
int __init btrfs_end_io_wq_init(void);
void btrfs_end_io_wq_exit(void);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 3aeb5770f896..ddaccad469f8 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -283,11 +283,6 @@ static int btrfs_get_name(struct dentry *parent, char *name,
name_len = btrfs_inode_ref_name_len(leaf, iref);
}
- ret = btrfs_is_name_len_valid(leaf, path->slots[0], name_ptr, name_len);
- if (!ret) {
- btrfs_free_path(path);
- return -EIO;
- }
read_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_free_path(path);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4497f937e8fb..fa78da608ff2 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2894,7 +2894,7 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *global_rsv;
u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
- u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
+ unsigned int num_dirty_bgs = trans->transaction->num_dirty_bgs;
u64 num_bytes, num_dirty_bgs_bytes;
int ret = 0;
@@ -4945,12 +4945,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info,
bytes = 0;
else
bytes -= delayed_rsv->size;
+ spin_unlock(&delayed_rsv->lock);
+
if (percpu_counter_compare(&space_info->total_bytes_pinned,
bytes) < 0) {
- spin_unlock(&delayed_rsv->lock);
return -ENOSPC;
}
- spin_unlock(&delayed_rsv->lock);
commit:
trans = btrfs_join_transaction(fs_info->extent_root);
@@ -5738,8 +5738,8 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
* or return if we already have enough space. This will also handle the resreve
* tracepoint for the reserved amount.
*/
-int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
- enum btrfs_reserve_flush_enum flush)
+static int btrfs_inode_rsv_refill(struct btrfs_inode *inode,
+ enum btrfs_reserve_flush_enum flush)
{
struct btrfs_root *root = inode->root;
struct btrfs_block_rsv *block_rsv = &inode->block_rsv;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 012d63870b99..ef72efef8b39 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -21,6 +21,7 @@
#include "locking.h"
#include "rcu-string.h"
#include "backref.h"
+#include "disk-io.h"
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
@@ -109,8 +110,6 @@ struct tree_entry {
struct extent_page_data {
struct bio *bio;
struct extent_io_tree *tree;
- get_extent_t *get_extent;
-
/* tells writepage not to lock the state bits for this range
* it still does the unlocking
*/
@@ -581,7 +580,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
*
* This takes the tree lock, and returns 0 on success and < 0 on error.
*/
-static int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
struct extent_state **cached_state,
gfp_t mask, struct extent_changeset *changeset)
@@ -1295,10 +1294,10 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
- struct extent_state **cached, gfp_t mask)
+ struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, bits, wake, delete,
- cached, mask, NULL);
+ cached, GFP_NOFS, NULL);
}
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -1348,7 +1347,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
- EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS);
+ EXTENT_LOCKED, 1, 0, NULL);
return 0;
}
return 1;
@@ -1744,7 +1743,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
unsigned long page_ops)
{
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, clear_bits, 1, 0,
- NULL, GFP_NOFS);
+ NULL);
__process_pages_contig(inode->i_mapping, locked_page,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
@@ -3092,9 +3091,8 @@ out:
static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
struct page *pages[], int nr_pages,
u64 start, u64 end,
- get_extent_t *get_extent,
struct extent_map **em_cached,
- struct bio **bio, int mirror_num,
+ struct bio **bio,
unsigned long *bio_flags,
u64 *prev_em_start)
{
@@ -3115,18 +3113,17 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
}
for (index = 0; index < nr_pages; index++) {
- __do_readpage(tree, pages[index], get_extent, em_cached, bio,
- mirror_num, bio_flags, 0, prev_em_start);
+ __do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
+ bio, 0, bio_flags, 0, prev_em_start);
put_page(pages[index]);
}
}
static void __extent_readpages(struct extent_io_tree *tree,
struct page *pages[],
- int nr_pages, get_extent_t *get_extent,
+ int nr_pages,
struct extent_map **em_cached,
- struct bio **bio, int mirror_num,
- unsigned long *bio_flags,
+ struct bio **bio, unsigned long *bio_flags,
u64 *prev_em_start)
{
u64 start = 0;
@@ -3146,8 +3143,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
} else {
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
- end, get_extent, em_cached,
- bio, mirror_num, bio_flags,
+ end, em_cached,
+ bio, bio_flags,
prev_em_start);
start = page_start;
end = start + PAGE_SIZE - 1;
@@ -3158,9 +3155,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
if (end)
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
- end, get_extent, em_cached, bio,
- mirror_num, bio_flags,
- prev_em_start);
+ end, em_cached, bio,
+ bio_flags, prev_em_start);
}
static int __extent_read_full_page(struct extent_io_tree *tree,
@@ -3375,7 +3371,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
page_end, NULL, 1);
break;
}
- em = epd->get_extent(BTRFS_I(inode), page, pg_offset, cur,
+ em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
end - cur + 1, 1);
if (IS_ERR_OR_NULL(em)) {
SetPageError(page);
@@ -4060,14 +4056,12 @@ static noinline void flush_write_bio(void *data)
}
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent,
struct writeback_control *wbc)
{
int ret;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
- .get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
@@ -4079,8 +4073,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
}
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
- u64 start, u64 end, get_extent_t *get_extent,
- int mode)
+ u64 start, u64 end, int mode)
{
int ret = 0;
struct address_space *mapping = inode->i_mapping;
@@ -4091,7 +4084,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
- .get_extent = get_extent,
.extent_locked = 1,
.sync_io = mode == WB_SYNC_ALL,
};
@@ -4123,14 +4115,12 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
- get_extent_t *get_extent,
struct writeback_control *wbc)
{
int ret = 0;
struct extent_page_data epd = {
.bio = NULL,
.tree = tree,
- .get_extent = get_extent,
.extent_locked = 0,
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
};
@@ -4143,8 +4133,7 @@ int extent_writepages(struct extent_io_tree *tree,
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages,
- get_extent_t get_extent)
+ struct list_head *pages, unsigned nr_pages)
{
struct bio *bio = NULL;
unsigned page_idx;
@@ -4170,13 +4159,13 @@ int extent_readpages(struct extent_io_tree *tree,
pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool))
continue;
- __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, &prev_em_start);
+ __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
+ &bio_flags, &prev_em_start);
nr = 0;
}
if (nr)
- __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, &prev_em_start);
+ __extent_readpages(tree, pagepool, nr, &em_cached, &bio,
+ &bio_flags, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
@@ -4209,7 +4198,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
clear_extent_bit(tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING,
- 1, 1, &cached_state, GFP_NOFS);
+ 1, 1, &cached_state);
return 0;
}
@@ -4234,9 +4223,9 @@ static int try_release_extent_state(struct extent_map_tree *map,
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
*/
- ret = clear_extent_bit(tree, start, end,
+ ret = __clear_extent_bit(tree, start, end,
~(EXTENT_LOCKED | EXTENT_NODATASUM),
- 0, 0, NULL, mask);
+ 0, 0, NULL, mask, NULL);
/* if clear_extent_bit failed for enomem reasons,
* we can't allow the release to continue.
@@ -4302,9 +4291,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
* This maps until we find something past 'last'
*/
static struct extent_map *get_extent_skip_holes(struct inode *inode,
- u64 offset,
- u64 last,
- get_extent_t *get_extent)
+ u64 offset, u64 last)
{
u64 sectorsize = btrfs_inode_sectorsize(inode);
struct extent_map *em;
@@ -4318,15 +4305,14 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
if (len == 0)
break;
len = ALIGN(len, sectorsize);
- em = get_extent(BTRFS_I(inode), NULL, 0, offset, len, 0);
+ em = btrfs_get_extent_fiemap(BTRFS_I(inode), NULL, 0, offset,
+ len, 0);
if (IS_ERR_OR_NULL(em))
return em;
/* if this isn't a hole return it */
- if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
- em->block_start != EXTENT_MAP_HOLE) {
+ if (em->block_start != EXTENT_MAP_HOLE)
return em;
- }
/* this is a hole, advance to the next extent */
offset = extent_map_end(em);
@@ -4451,7 +4437,7 @@ static int emit_last_fiemap_cache(struct btrfs_fs_info *fs_info,
}
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len, get_extent_t *get_extent)
+ __u64 start, __u64 len)
{
int ret = 0;
u64 off = start;
@@ -4533,8 +4519,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
- em = get_extent_skip_holes(inode, start, last_for_get_extent,
- get_extent);
+ em = get_extent_skip_holes(inode, start, last_for_get_extent);
if (!em)
goto out;
if (IS_ERR(em)) {
@@ -4622,8 +4607,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
/* now scan forward to see if this is really the last extent. */
- em = get_extent_skip_holes(inode, off, last_for_get_extent,
- get_extent);
+ em = get_extent_skip_holes(inode, off, last_for_get_extent);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
@@ -5263,8 +5247,7 @@ int extent_buffer_uptodate(struct extent_buffer *eb)
}
int read_extent_buffer_pages(struct extent_io_tree *tree,
- struct extent_buffer *eb, int wait,
- get_extent_t *get_extent, int mirror_num)
+ struct extent_buffer *eb, int wait, int mirror_num)
{
unsigned long i;
struct page *page;
@@ -5324,7 +5307,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ClearPageError(page);
err = __extent_read_full_page(tree, page,
- get_extent, &bio,
+ btree_get_extent, &bio,
mirror_num, &bio_flags,
REQ_META);
if (err) {
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 93dcae0c3183..c28f5ef88f42 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -300,19 +300,22 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
unsigned bits, int wake, int delete,
- struct extent_state **cached, gfp_t mask);
+ struct extent_state **cached);
+int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned bits, int wake, int delete,
+ struct extent_state **cached, gfp_t mask,
+ struct extent_changeset *changeset);
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
- GFP_NOFS);
+ return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
}
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached, gfp_t mask)
{
- return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
- mask);
+ return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+ mask, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
@@ -323,8 +326,7 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
if (bits & EXTENT_LOCKED)
wake = 1;
- return clear_extent_bit(tree, start, end, bits, wake, 0, NULL,
- GFP_NOFS);
+ return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -340,10 +342,10 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
- u64 end, struct extent_state **cached_state, gfp_t mask)
+ u64 end, struct extent_state **cached_state)
{
- return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
- cached_state, mask);
+ return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+ cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
@@ -358,7 +360,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+ EXTENT_DO_ACCOUNTING, 0, 0, NULL);
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
@@ -402,23 +404,19 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
- get_extent_t *get_extent,
struct writeback_control *wbc);
int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
- u64 start, u64 end, get_extent_t *get_extent,
- int mode);
+ u64 start, u64 end, int mode);
int extent_writepages(struct extent_io_tree *tree,
struct address_space *mapping,
- get_extent_t *get_extent,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
int extent_readpages(struct extent_io_tree *tree,
struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages,
- get_extent_t get_extent);
+ struct list_head *pages, unsigned nr_pages);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
- __u64 start, __u64 len, get_extent_t *get_extent);
+ __u64 start, __u64 len);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
@@ -437,7 +435,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
#define WAIT_PAGE_LOCK 2
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, int wait,
- get_extent_t *get_extent, int mirror_num);
+ int mirror_num);
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
static inline unsigned long num_extent_pages(u64 start, u64 len)
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 64365bbc9b16..e9e285d45c7e 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -13,7 +13,6 @@
/* bits for the flags field */
#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
#define EXTENT_FLAG_COMPRESSED 1
-#define EXTENT_FLAG_VACANCY 2 /* no file extent item found */
#define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */
#define EXTENT_FLAG_LOGGING 4 /* Logging this extent */
#define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eb1bac7c8553..1589961f7bba 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1519,7 +1519,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
clear_extent_bit(&inode->io_tree, start_pos, last_pos,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, cached_state, GFP_NOFS);
+ 0, 0, cached_state);
*lockstart = start_pos;
*lockend = last_pos;
ret = 1;
@@ -1755,7 +1755,7 @@ again:
if (copied > 0)
ret = btrfs_dirty_pages(inode, pages, dirty_pages,
- pos, copied, NULL);
+ pos, copied, &cached_state);
if (extents_locked)
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
lockstart, lockend, &cached_state,
@@ -2019,10 +2019,19 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
{
int ret;
+ struct blk_plug plug;
+ /*
+ * This is only called in fsync, which would do synchronous writes, so
+ * a plug can merge adjacent IOs as much as possible. Esp. in case of
+ * multiple disks using raid profile, a large IO can be split to
+ * several segments of stripe length (currently 64K).
+ */
+ blk_start_plug(&plug);
atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = btrfs_fdatawrite_range(inode, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
+ blk_finish_plug(&plug);
return ret;
}
@@ -2450,6 +2459,46 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
return ret;
}
+static int btrfs_punch_hole_lock_range(struct inode *inode,
+ const u64 lockstart,
+ const u64 lockend,
+ struct extent_state **cached_state)
+{
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+ int ret;
+
+ truncate_pagecache_range(inode, lockstart, lockend);
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ cached_state);
+ ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
+
+ /*
+ * We need to make sure we have no ordered extents in this range
+ * and nobody raced in and read a page in this range, if we did
+ * we need to try again.
+ */
+ if ((!ordered ||
+ (ordered->file_offset + ordered->len <= lockstart ||
+ ordered->file_offset > lockend)) &&
+ !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ break;
+ }
+ if (ordered)
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, cached_state, GFP_NOFS);
+ ret = btrfs_wait_ordered_range(inode, lockstart,
+ lockend - lockstart + 1);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2566,38 +2615,11 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
goto out_only_mutex;
}
- while (1) {
- struct btrfs_ordered_extent *ordered;
-
- truncate_pagecache_range(inode, lockstart, lockend);
-
- lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- &cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
-
- /*
- * We need to make sure we have no ordered extents in this range
- * and nobody raced in and read a page in this range, if we did
- * we need to try again.
- */
- if ((!ordered ||
- (ordered->file_offset + ordered->len <= lockstart ||
- ordered->file_offset > lockend)) &&
- !btrfs_page_exists_in_range(inode, lockstart, lockend)) {
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- break;
- }
- if (ordered)
- btrfs_put_ordered_extent(ordered);
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, &cached_state, GFP_NOFS);
- ret = btrfs_wait_ordered_range(inode, lockstart,
- lockend - lockstart + 1);
- if (ret) {
- inode_unlock(inode);
- return ret;
- }
+ ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
+ &cached_state);
+ if (ret) {
+ inode_unlock(inode);
+ goto out_only_mutex;
}
path = btrfs_alloc_path();
@@ -2806,6 +2828,217 @@ insert:
return 0;
}
+static int btrfs_fallocate_update_isize(struct inode *inode,
+ const u64 end,
+ const int mode)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+ int ret2;
+
+ if (mode & FALLOC_FL_KEEP_SIZE || end <= i_size_read(inode))
+ return 0;
+
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ inode->i_ctime = current_time(inode);
+ i_size_write(inode, end);
+ btrfs_ordered_update_i_size(inode, end, NULL);
+ ret = btrfs_update_inode(trans, root, inode);
+ ret2 = btrfs_end_transaction(trans);
+
+ return ret ? ret : ret2;
+}
+
+static int btrfs_zero_range_check_range_boundary(struct inode *inode,
+ u64 offset)
+{
+ const u64 sectorsize = btrfs_inode_sectorsize(inode);
+ struct extent_map *em;
+ int ret = 0;
+
+ offset = round_down(offset, sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+
+ if (em->block_start == EXTENT_MAP_HOLE)
+ ret = 1;
+
+ free_extent_map(em);
+ return ret;
+}
+
+static int btrfs_zero_range(struct inode *inode,
+ loff_t offset,
+ loff_t len,
+ const int mode)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct extent_map *em;
+ struct extent_changeset *data_reserved = NULL;
+ int ret;
+ u64 alloc_hint = 0;
+ const u64 sectorsize = btrfs_inode_sectorsize(inode);
+ u64 alloc_start = round_down(offset, sectorsize);
+ u64 alloc_end = round_up(offset + len, sectorsize);
+ u64 bytes_to_reserve = 0;
+ bool space_reserved = false;
+
+ inode_dio_wait(inode);
+
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
+ alloc_start, alloc_end - alloc_start, 0);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+
+ /*
+ * Avoid hole punching and extent allocation for some cases. More cases
+ * could be considered, but these are unlikely common and we keep things
+ * as simple as possible for now. Also, intentionally, if the target
+ * range contains one or more prealloc extents together with regular
+ * extents and holes, we drop all the existing extents and allocate a
+ * new prealloc extent, so that we get a larger contiguous disk extent.
+ */
+ if (em->start <= alloc_start &&
+ test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ const u64 em_end = em->start + em->len;
+
+ if (em_end >= offset + len) {
+ /*
+ * The whole range is already a prealloc extent,
+ * do nothing except updating the inode's i_size if
+ * needed.
+ */
+ free_extent_map(em);
+ ret = btrfs_fallocate_update_isize(inode, offset + len,
+ mode);
+ goto out;
+ }
+ /*
+ * Part of the range is already a prealloc extent, so operate
+ * only on the remaining part of the range.
+ */
+ alloc_start = em_end;
+ ASSERT(IS_ALIGNED(alloc_start, sectorsize));
+ len = offset + len - alloc_start;
+ offset = alloc_start;
+ alloc_hint = em->block_start + em->len;
+ }
+ free_extent_map(em);
+
+ if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
+ BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
+ alloc_start, sectorsize, 0);
+ if (IS_ERR(em)) {
+ ret = PTR_ERR(em);
+ goto out;
+ }
+
+ if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ free_extent_map(em);
+ ret = btrfs_fallocate_update_isize(inode, offset + len,
+ mode);
+ goto out;
+ }
+ if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
+ free_extent_map(em);
+ ret = btrfs_truncate_block(inode, offset, len, 0);
+ if (!ret)
+ ret = btrfs_fallocate_update_isize(inode,
+ offset + len,
+ mode);
+ return ret;
+ }
+ free_extent_map(em);
+ alloc_start = round_down(offset, sectorsize);
+ alloc_end = alloc_start + sectorsize;
+ goto reserve_space;
+ }
+
+ alloc_start = round_up(offset, sectorsize);
+ alloc_end = round_down(offset + len, sectorsize);
+
+ /*
+ * For unaligned ranges, check the pages at the boundaries, they might
+ * map to an extent, in which case we need to partially zero them, or
+ * they might map to a hole, in which case we need our allocation range
+ * to cover them.
+ */
+ if (!IS_ALIGNED(offset, sectorsize)) {
+ ret = btrfs_zero_range_check_range_boundary(inode, offset);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ alloc_start = round_down(offset, sectorsize);
+ ret = 0;
+ } else {
+ ret = btrfs_truncate_block(inode, offset, 0, 0);
+ if (ret)
+ goto out;
+ }
+ }
+
+ if (!IS_ALIGNED(offset + len, sectorsize)) {
+ ret = btrfs_zero_range_check_range_boundary(inode,
+ offset + len);
+ if (ret < 0)
+ goto out;
+ if (ret) {
+ alloc_end = round_up(offset + len, sectorsize);
+ ret = 0;
+ } else {
+ ret = btrfs_truncate_block(inode, offset + len, 0, 1);
+ if (ret)
+ goto out;
+ }
+ }
+
+reserve_space:
+ if (alloc_start < alloc_end) {
+ struct extent_state *cached_state = NULL;
+ const u64 lockstart = alloc_start;
+ const u64 lockend = alloc_end - 1;
+
+ bytes_to_reserve = alloc_end - alloc_start;
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
+ bytes_to_reserve);
+ if (ret < 0)
+ goto out;
+ space_reserved = true;
+ ret = btrfs_qgroup_reserve_data(inode, &data_reserved,
+ alloc_start, bytes_to_reserve);
+ if (ret)
+ goto out;
+ ret = btrfs_punch_hole_lock_range(inode, lockstart, lockend,
+ &cached_state);
+ if (ret)
+ goto out;
+ ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
+ alloc_end - alloc_start,
+ i_blocksize(inode),
+ offset + len, &alloc_hint);
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state, GFP_KERNEL);
+ /* btrfs_prealloc_file_range releases reserved space on error */
+ if (ret)
+ space_reserved = false;
+ }
+ out:
+ if (ret && space_reserved)
+ btrfs_free_reserved_data_space(inode, data_reserved,
+ alloc_start, bytes_to_reserve);
+ extent_changeset_free(data_reserved);
+
+ return ret;
+}
+
static long btrfs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
@@ -2831,7 +3064,8 @@ static long btrfs_fallocate(struct file *file, int mode,
cur_offset = alloc_start;
/* Make sure we aren't being give some crap mode */
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
@@ -2842,10 +3076,12 @@ static long btrfs_fallocate(struct file *file, int mode,
*
* For qgroup space, it will be checked later.
*/
- ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
- alloc_end - alloc_start);
- if (ret < 0)
- return ret;
+ if (!(mode & FALLOC_FL_ZERO_RANGE)) {
+ ret = btrfs_alloc_data_chunk_ondemand(BTRFS_I(inode),
+ alloc_end - alloc_start);
+ if (ret < 0)
+ return ret;
+ }
inode_lock(inode);
@@ -2887,6 +3123,12 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret)
goto out;
+ if (mode & FALLOC_FL_ZERO_RANGE) {
+ ret = btrfs_zero_range(inode, offset, len, mode);
+ inode_unlock(inode);
+ return ret;
+ }
+
locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
@@ -2896,8 +3138,8 @@ static long btrfs_fallocate(struct file *file, int mode,
*/
lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
locked_end, &cached_state);
- ordered = btrfs_lookup_first_ordered_extent(inode,
- alloc_end - 1);
+ ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
+
if (ordered &&
ordered->file_offset + ordered->len > alloc_start &&
ordered->file_offset < alloc_end) {
@@ -2922,7 +3164,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
INIT_LIST_HEAD(&reserve_list);
- while (1) {
+ while (cur_offset < alloc_end) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
if (IS_ERR(em)) {
@@ -2958,8 +3200,6 @@ static long btrfs_fallocate(struct file *file, int mode,
}
free_extent_map(em);
cur_offset = last_byte;
- if (cur_offset >= alloc_end)
- break;
}
/*
@@ -2982,37 +3222,18 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret < 0)
goto out_unlock;
- if (actual_end > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /*
- * We didn't need to allocate any more space, but we
- * still extended the size of the file so we need to
- * update i_size and the inode item.
- */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- } else {
- inode->i_ctime = current_time(inode);
- i_size_write(inode, actual_end);
- btrfs_ordered_update_i_size(inode, actual_end, NULL);
- ret = btrfs_update_inode(trans, root, inode);
- if (ret)
- btrfs_end_transaction(trans);
- else
- ret = btrfs_end_transaction(trans);
- }
- }
+ /*
+ * We didn't need to allocate any more space, but we still extended the
+ * size of the file so we need to update i_size and the inode item.
+ */
+ ret = btrfs_fallocate_update_isize(inode, actual_end, mode);
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_KERNEL);
out:
inode_unlock(inode);
/* Let go of our reservation. */
- if (ret != 0)
+ if (ret != 0 && !(mode & FALLOC_FL_ZERO_RANGE))
btrfs_free_reserved_data_space(inode, data_reserved,
alloc_start, alloc_end - cur_offset);
extent_changeset_free(data_reserved);
@@ -3145,7 +3366,7 @@ void btrfs_auto_defrag_exit(void)
kmem_cache_destroy(btrfs_inode_defrag_cachep);
}
-int btrfs_auto_defrag_init(void)
+int __init btrfs_auto_defrag_init(void)
{
btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag",
sizeof(struct inode_defrag), 0,
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4426d1c73e50..b8ab90c9a9fb 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -993,8 +993,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
- GFP_NOFS);
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
goto fail;
}
leaf = path->nodes[0];
@@ -1008,7 +1007,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
inode->i_size - 1,
EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
- NULL, GFP_NOFS);
+ NULL);
btrfs_release_path(path);
goto fail;
}
@@ -1105,8 +1104,7 @@ static int flush_dirty_cache(struct inode *inode)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
- EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
- GFP_NOFS);
+ EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
return ret;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 993061f83067..57785eadb95c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -769,7 +769,6 @@ retry:
inode, async_extent->start,
async_extent->start +
async_extent->ram_size - 1,
- btrfs_get_extent,
WB_SYNC_ALL);
else if (ret)
unlock_page(async_cow->locked_page);
@@ -1203,7 +1202,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
u64 cur_end;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
- 1, 0, NULL, GFP_NOFS);
+ 1, 0, NULL);
while (start < end) {
async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);
BUG_ON(!async_cow); /* -ENOMEM */
@@ -1951,7 +1950,21 @@ static blk_status_t __btrfs_submit_bio_done(void *private_data, struct bio *bio,
/*
* extent_io.c submission hook. This does the right thing for csum calculation
- * on write, or reading the csums from the tree before a read
+ * on write, or reading the csums from the tree before a read.
+ *
+ * Rules about async/sync submit,
+ * a) read: sync submit
+ *
+ * b) write without checksum: sync submit
+ *
+ * c) write with checksum:
+ * c-1) if bio is issued by fsync: sync submit
+ * (sync_writers != 0)
+ *
+ * c-2) if root is reloc root: sync submit
+ * (only in case of buffered IO)
+ *
+ * c-3) otherwise: async submit
*/
static blk_status_t btrfs_submit_bio_hook(void *private_data, struct bio *bio,
int mirror_num, unsigned long bio_flags,
@@ -2023,10 +2036,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sum;
list_for_each_entry(sum, list, list) {
- trans->adding_csums = 1;
+ trans->adding_csums = true;
btrfs_csum_file_blocks(trans,
BTRFS_I(inode)->root->fs_info->csum_root, sum);
- trans->adding_csums = 0;
+ trans->adding_csums = false;
}
return 0;
}
@@ -2986,7 +2999,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
clear_extent_bit(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
- EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
+ EXTENT_DEFRAG, 0, 0, &cached_state);
}
if (nolock)
@@ -3005,6 +3018,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
compress_type = ordered_extent->compress_type;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compress_type);
+ btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
+ ordered_extent->len);
ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
ordered_extent->file_offset,
ordered_extent->file_offset +
@@ -3054,7 +3069,7 @@ out:
ordered_extent->len - 1,
clear_bits,
(clear_bits & EXTENT_LOCKED) ? 1 : 0,
- 0, &cached_state, GFP_NOFS);
+ 0, &cached_state);
}
if (trans)
@@ -3068,7 +3083,7 @@ out:
else
start = ordered_extent->file_offset;
end = ordered_extent->file_offset + ordered_extent->len - 1;
- clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
+ clear_extent_uptodate(io_tree, start, end, NULL);
/* Drop the cache for the part of the extent we didn't write. */
btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
@@ -4796,7 +4811,7 @@ again:
clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state, GFP_NOFS);
+ 0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, block_start, block_end, 0,
&cached_state, 0);
@@ -5232,8 +5247,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
clear_extent_bit(io_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 1,
- &cached_state, GFP_NOFS);
+ EXTENT_DEFRAG, 1, 1, &cached_state);
cond_resched();
spin_lock(&io_tree->lock);
@@ -5892,7 +5906,6 @@ static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_file_private *private = file->private_data;
struct btrfs_dir_item *di;
@@ -5960,9 +5973,6 @@ again:
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
- if (verify_dir_item(fs_info, leaf, slot, di))
- goto next;
-
name_len = btrfs_dir_name_len(leaf, di);
if ((total_len + sizeof(struct dir_entry) + name_len) >=
PAGE_SIZE) {
@@ -6558,7 +6568,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
out_unlock:
btrfs_end_transaction(trans);
- btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
if (drop_inode) {
inode_dec_link_count(inode);
@@ -6639,7 +6648,6 @@ out_unlock:
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
return err;
@@ -6714,7 +6722,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
}
- btrfs_balance_delayed_items(fs_info);
fail:
if (trans)
btrfs_end_transaction(trans);
@@ -6792,7 +6799,6 @@ out_fail:
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
return err;
@@ -6940,7 +6946,6 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
struct btrfs_trans_handle *trans = NULL;
const bool new_inline = !page || create;
-again:
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (em)
@@ -7081,7 +7086,7 @@ next:
em->orig_block_len = em->len;
em->orig_start = em->start;
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
- if (create == 0 && !PageUptodate(page)) {
+ if (!PageUptodate(page)) {
if (btrfs_file_extent_compression(leaf, item) !=
BTRFS_COMPRESS_NONE) {
ret = uncompress_inline(path, page, pg_offset,
@@ -7102,25 +7107,6 @@ next:
kunmap(page);
}
flush_dcache_page(page);
- } else if (create && PageUptodate(page)) {
- BUG();
- if (!trans) {
- kunmap(page);
- free_extent_map(em);
- em = NULL;
-
- btrfs_release_path(path);
- trans = btrfs_join_transaction(root);
-
- if (IS_ERR(trans))
- return ERR_CAST(trans);
- goto again;
- }
- map = kmap(page);
- write_extent_buffer(leaf, map + pg_offset, ptr,
- copy_size);
- kunmap(page);
- btrfs_mark_buffer_dirty(leaf);
}
set_extent_uptodate(io_tree, em->start,
extent_map_end(em) - 1, NULL, GFP_NOFS);
@@ -7132,7 +7118,6 @@ not_found:
em->len = len;
not_found_em:
em->block_start = EXTENT_MAP_HOLE;
- set_bit(EXTENT_FLAG_VACANCY, &em->flags);
insert:
btrfs_release_path(path);
if (em->start > start || extent_map_end(em) <= start) {
@@ -7924,7 +7909,7 @@ unlock:
if (lockstart < lockend) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, unlock_bits, 1, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
} else {
free_extent_state(cached_state);
}
@@ -7935,7 +7920,7 @@ unlock:
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
- unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+ unlock_bits, 1, 0, &cached_state);
err:
if (dio_data)
current->journal_info = dio_data;
@@ -8458,6 +8443,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
bool write = bio_op(bio) == REQ_OP_WRITE;
blk_status_t ret;
+ /* Check btrfs_submit_bio_hook() for rules about async submit. */
if (async_submit)
async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
@@ -8852,7 +8838,7 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
- return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
+ return extent_fiemap(inode, fieinfo, start, len);
}
int btrfs_readpage(struct file *file, struct page *page)
@@ -8884,7 +8870,7 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
return AOP_WRITEPAGE_ACTIVATE;
}
tree = &BTRFS_I(page->mapping->host)->io_tree;
- ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc);
+ ret = extent_write_full_page(tree, page, wbc);
btrfs_add_delayed_iput(inode);
return ret;
}
@@ -8895,7 +8881,7 @@ static int btrfs_writepages(struct address_space *mapping,
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
+ return extent_writepages(tree, mapping, wbc);
}
static int
@@ -8904,8 +8890,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
{
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;
- return extent_readpages(tree, mapping, pages, nr_pages,
- btrfs_get_extent);
+ return extent_readpages(tree, mapping, pages, nr_pages);
}
static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{
@@ -8976,8 +8961,7 @@ again:
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 0, &cached_state,
- GFP_NOFS);
+ EXTENT_DEFRAG, 1, 0, &cached_state);
/*
* whoever cleared the private bit is responsible
* for the finish_ordered_io
@@ -9034,7 +9018,7 @@ again:
EXTENT_LOCKED | EXTENT_DIRTY |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
- &cached_state, GFP_NOFS);
+ &cached_state);
__btrfs_releasepage(page, GFP_NOFS);
}
@@ -9162,7 +9146,7 @@ again:
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
- 0, 0, &cached_state, GFP_NOFS);
+ 0, 0, &cached_state);
ret = btrfs_set_extent_delalloc(inode, page_start, end, 0,
&cached_state, 0);
@@ -9419,7 +9403,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
struct btrfs_inode *ei;
struct inode *inode;
- ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
+ ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
@@ -9571,7 +9555,7 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_free_space_cachep);
}
-int btrfs_init_cachep(void)
+int __init btrfs_init_cachep(void)
{
btrfs_inode_cachep = kmem_cache_create("btrfs_inode",
sizeof(struct btrfs_inode), 0,
@@ -10686,7 +10670,6 @@ out:
btrfs_end_transaction(trans);
if (ret)
iput(inode);
- btrfs_balance_delayed_items(fs_info);
btrfs_btree_balance_dirty(fs_info);
return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d748ad1c3620..2f7562b1e349 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1190,7 +1190,7 @@ again:
clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
- &cached_state, GFP_NOFS);
+ &cached_state);
if (i_done != page_cnt) {
spin_lock(&BTRFS_I(inode)->lock);
@@ -1760,6 +1760,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
struct btrfs_trans_handle *trans;
u64 root_flags;
u64 flags;
+ bool clear_received_uuid = false;
int ret = 0;
if (!inode_owner_or_capable(inode))
@@ -1809,6 +1810,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
btrfs_set_root_flags(&root->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY);
spin_unlock(&root->root_item_lock);
+ clear_received_uuid = true;
} else {
spin_unlock(&root->root_item_lock);
btrfs_warn(fs_info,
@@ -1825,6 +1827,24 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
goto out_reset;
}
+ if (clear_received_uuid) {
+ if (!btrfs_is_empty_uuid(root->root_item.received_uuid)) {
+ ret = btrfs_uuid_tree_rem(trans, fs_info,
+ root->root_item.received_uuid,
+ BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+ root->root_key.objectid);
+
+ if (ret && ret != -ENOENT) {
+ btrfs_abort_transaction(trans, ret);
+ btrfs_end_transaction(trans);
+ goto out_reset;
+ }
+
+ memset(root->root_item.received_uuid, 0,
+ BTRFS_UUID_SIZE);
+ }
+ }
+
ret = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key, &root->root_item);
if (ret < 0) {
@@ -2675,14 +2695,12 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
goto out;
}
- mutex_lock(&fs_info->volume_mutex);
if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) {
ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
} else {
vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
}
- mutex_unlock(&fs_info->volume_mutex);
clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
if (!ret) {
@@ -2726,9 +2744,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
}
vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
- mutex_lock(&fs_info->volume_mutex);
ret = btrfs_rm_device(fs_info, vol_args->name, 0);
- mutex_unlock(&fs_info->volume_mutex);
if (!ret)
btrfs_info(fs_info, "disk deleted %s", vol_args->name);
@@ -2753,16 +2769,16 @@ static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info,
if (!fi_args)
return -ENOMEM;
- mutex_lock(&fs_devices->device_list_mutex);
+ rcu_read_lock();
fi_args->num_devices = fs_devices->num_devices;
- memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
- list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) {
if (device->devid > fi_args->max_id)
fi_args->max_id = device->devid;
}
- mutex_unlock(&fs_devices->device_list_mutex);
+ rcu_read_unlock();
+ memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid));
fi_args->nodesize = fs_info->nodesize;
fi_args->sectorsize = fs_info->sectorsize;
fi_args->clone_alignment = fs_info->sectorsize;
@@ -2779,7 +2795,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
{
struct btrfs_ioctl_dev_info_args *di_args;
struct btrfs_device *dev;
- struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
int ret = 0;
char *s_uuid = NULL;
@@ -2790,7 +2805,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
if (!btrfs_is_empty_uuid(di_args->uuid))
s_uuid = di_args->uuid;
- mutex_lock(&fs_devices->device_list_mutex);
+ rcu_read_lock();
dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL);
if (!dev) {
@@ -2805,17 +2820,15 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
if (dev->name) {
struct rcu_string *name;
- rcu_read_lock();
name = rcu_dereference(dev->name);
strncpy(di_args->path, name->str, sizeof(di_args->path));
- rcu_read_unlock();
di_args->path[sizeof(di_args->path) - 1] = 0;
} else {
di_args->path[0] = '\0';
}
out:
- mutex_unlock(&fs_devices->device_list_mutex);
+ rcu_read_unlock();
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
ret = -EFAULT;
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f6a05f836629..4f23ff00c8b3 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -164,7 +164,6 @@ static int iterate_object_props(struct btrfs_root *root,
size_t),
void *ctx)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
char *name_buf = NULL;
char *value_buf = NULL;
@@ -215,12 +214,6 @@ static int iterate_object_props(struct btrfs_root *root,
name_ptr = (unsigned long)(di + 1);
data_ptr = name_ptr + name_len;
- if (verify_dir_item(fs_info, leaf,
- path->slots[0], di)) {
- ret = -EIO;
- goto out;
- }
-
if (name_len <= XATTR_BTRFS_PREFIX_LEN ||
memcmp_extent_buffer(leaf, XATTR_BTRFS_PREFIX,
name_ptr,
@@ -414,7 +407,7 @@ static int prop_compression_apply(struct inode *inode,
type = BTRFS_COMPRESS_LZO;
else if (!strncmp("zlib", value, 4))
type = BTRFS_COMPRESS_ZLIB;
- else if (!strncmp("zstd", value, len))
+ else if (!strncmp("zstd", value, 4))
type = BTRFS_COMPRESS_ZSTD;
else
return -EINVAL;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 168fd03ca3ac..c02670d5aa13 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -47,6 +47,80 @@
* - check all ioctl parameters
*/
+/*
+ * Helpers to access qgroup reservation
+ *
+ * Callers should ensure the lock context and type are valid
+ */
+
+static u64 qgroup_rsv_total(const struct btrfs_qgroup *qgroup)
+{
+ u64 ret = 0;
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+ ret += qgroup->rsv.values[i];
+
+ return ret;
+}
+
+#ifdef CONFIG_BTRFS_DEBUG
+static const char *qgroup_rsv_type_str(enum btrfs_qgroup_rsv_type type)
+{
+ if (type == BTRFS_QGROUP_RSV_DATA)
+ return "data";
+ if (type == BTRFS_QGROUP_RSV_META)
+ return "meta";
+ return NULL;
+}
+#endif
+
+static void qgroup_rsv_add(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type)
+{
+ trace_qgroup_update_reserve(fs_info, qgroup, num_bytes, type);
+ qgroup->rsv.values[type] += num_bytes;
+}
+
+static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type)
+{
+ trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes, type);
+ if (qgroup->rsv.values[type] >= num_bytes) {
+ qgroup->rsv.values[type] -= num_bytes;
+ return;
+ }
+#ifdef CONFIG_BTRFS_DEBUG
+ WARN_RATELIMIT(1,
+ "qgroup %llu %s reserved space underflow, have %llu to free %llu",
+ qgroup->qgroupid, qgroup_rsv_type_str(type),
+ qgroup->rsv.values[type], num_bytes);
+#endif
+ qgroup->rsv.values[type] = 0;
+}
+
+static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *dest,
+ struct btrfs_qgroup *src)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+ qgroup_rsv_add(fs_info, dest, src->rsv.values[i], i);
+}
+
+static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *dest,
+ struct btrfs_qgroup *src)
+{
+ int i;
+
+ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++)
+ qgroup_rsv_release(fs_info, dest, src->rsv.values[i], i);
+}
+
static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
int mod)
{
@@ -991,33 +1065,29 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
-static void report_reserved_underflow(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup *qgroup,
- u64 num_bytes)
-{
-#ifdef CONFIG_BTRFS_DEBUG
- WARN_ON(qgroup->reserved < num_bytes);
- btrfs_debug(fs_info,
- "qgroup %llu reserved space underflow, have: %llu, to free: %llu",
- qgroup->qgroupid, qgroup->reserved, num_bytes);
-#endif
- qgroup->reserved = 0;
-}
/*
- * The easy accounting, if we are adding/removing the only ref for an extent
- * then this qgroup and all of the parent qgroups get their reference and
- * exclusive counts adjusted.
+ * The easy accounting, we're updating qgroup relationship whose child qgroup
+ * only has exclusive extents.
+ *
+ * In this case, all exclsuive extents will also be exlusive for parent, so
+ * excl/rfer just get added/removed.
+ *
+ * So is qgroup reservation space, which should also be added/removed to
+ * parent.
+ * Or when child tries to release reservation space, parent will underflow its
+ * reservation (for relationship adding case).
*
* Caller should hold fs_info->qgroup_lock.
*/
static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
struct ulist *tmp, u64 ref_root,
- u64 num_bytes, int sign)
+ struct btrfs_qgroup *src, int sign)
{
struct btrfs_qgroup *qgroup;
struct btrfs_qgroup_list *glist;
struct ulist_node *unode;
struct ulist_iterator uiter;
+ u64 num_bytes = src->excl;
int ret = 0;
qgroup = find_qgroup_rb(fs_info, ref_root);
@@ -1030,13 +1100,11 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
qgroup->excl_cmpr += sign * num_bytes;
- if (sign > 0) {
- trace_qgroup_update_reserve(fs_info, qgroup, -(s64)num_bytes);
- if (qgroup->reserved < num_bytes)
- report_reserved_underflow(fs_info, qgroup, num_bytes);
- else
- qgroup->reserved -= num_bytes;
- }
+
+ if (sign > 0)
+ qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
+ else
+ qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
qgroup_dirty(fs_info, qgroup);
@@ -1056,15 +1124,10 @@ static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
qgroup->rfer_cmpr += sign * num_bytes;
WARN_ON(sign < 0 && qgroup->excl < num_bytes);
qgroup->excl += sign * num_bytes;
- if (sign > 0) {
- trace_qgroup_update_reserve(fs_info, qgroup,
- -(s64)num_bytes);
- if (qgroup->reserved < num_bytes)
- report_reserved_underflow(fs_info, qgroup,
- num_bytes);
- else
- qgroup->reserved -= num_bytes;
- }
+ if (sign > 0)
+ qgroup_rsv_add_by_qgroup(fs_info, qgroup, src);
+ else
+ qgroup_rsv_release_by_qgroup(fs_info, qgroup, src);
qgroup->excl_cmpr += sign * num_bytes;
qgroup_dirty(fs_info, qgroup);
@@ -1107,7 +1170,7 @@ static int quick_update_accounting(struct btrfs_fs_info *fs_info,
if (qgroup->excl == qgroup->rfer) {
ret = 0;
err = __qgroup_excl_accounting(fs_info, tmp, dst,
- qgroup->excl, sign);
+ qgroup, sign);
if (err < 0) {
ret = err;
goto out;
@@ -2333,17 +2396,18 @@ out:
static bool qgroup_check_limits(const struct btrfs_qgroup *qg, u64 num_bytes)
{
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + (s64)qg->rfer + num_bytes > qg->max_rfer)
+ qgroup_rsv_total(qg) + (s64)qg->rfer + num_bytes > qg->max_rfer)
return false;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + (s64)qg->excl + num_bytes > qg->max_excl)
+ qgroup_rsv_total(qg) + (s64)qg->excl + num_bytes > qg->max_excl)
return false;
return true;
}
-static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce)
+static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
+ enum btrfs_qgroup_rsv_type type)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@@ -2395,7 +2459,7 @@ retry:
* Commit the tree and retry, since we may have
* deletions which would free up space.
*/
- if (!retried && qg->reserved > 0) {
+ if (!retried && qgroup_rsv_total(qg) > 0) {
struct btrfs_trans_handle *trans;
spin_unlock(&fs_info->qgroup_lock);
@@ -2434,8 +2498,8 @@ retry:
qg = unode_aux_to_qgroup(unode);
- trace_qgroup_update_reserve(fs_info, qg, num_bytes);
- qg->reserved += num_bytes;
+ trace_qgroup_update_reserve(fs_info, qg, num_bytes, type);
+ qgroup_rsv_add(fs_info, qg, num_bytes, type);
}
out:
@@ -2444,7 +2508,8 @@ out:
}
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
- u64 ref_root, u64 num_bytes)
+ u64 ref_root, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type)
{
struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
@@ -2480,11 +2545,8 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
qg = unode_aux_to_qgroup(unode);
- trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes);
- if (qg->reserved < num_bytes)
- report_reserved_underflow(fs_info, qg, num_bytes);
- else
- qg->reserved -= num_bytes;
+ trace_qgroup_update_reserve(fs_info, qg, -(s64)num_bytes, type);
+ qgroup_rsv_release(fs_info, qg, num_bytes, type);
list_for_each_entry(glist, &qg->groups, next_group) {
ret = ulist_add(fs_info->qgroup_ulist,
@@ -2872,7 +2934,7 @@ int btrfs_qgroup_reserve_data(struct inode *inode,
to_reserve, QGROUP_RESERVE);
if (ret < 0)
goto cleanup;
- ret = qgroup_reserve(root, to_reserve, true);
+ ret = qgroup_reserve(root, to_reserve, true, BTRFS_QGROUP_RSV_DATA);
if (ret < 0)
goto cleanup;
@@ -2883,8 +2945,7 @@ cleanup:
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(&reserved->range_changed, &uiter)))
clear_extent_bit(&BTRFS_I(inode)->io_tree, unode->val,
- unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL,
- GFP_NOFS);
+ unode->aux, EXTENT_QGROUP_RESERVED, 0, 0, NULL);
extent_changeset_release(reserved);
return ret;
}
@@ -2936,7 +2997,8 @@ static int qgroup_free_reserved_data(struct inode *inode,
goto out;
freed += changeset.bytes_changed;
}
- btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed);
+ btrfs_qgroup_free_refroot(root->fs_info, root->objectid, freed,
+ BTRFS_QGROUP_RSV_DATA);
ret = freed;
out:
extent_changeset_release(&changeset);
@@ -2968,7 +3030,7 @@ static int __btrfs_qgroup_release_data(struct inode *inode,
if (free)
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
- changeset.bytes_changed);
+ changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
ret = changeset.bytes_changed;
out:
extent_changeset_release(&changeset);
@@ -3025,7 +3087,7 @@ int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
BUG_ON(num_bytes != round_down(num_bytes, fs_info->nodesize));
trace_qgroup_meta_reserve(root, (s64)num_bytes);
- ret = qgroup_reserve(root, num_bytes, enforce);
+ ret = qgroup_reserve(root, num_bytes, enforce, BTRFS_QGROUP_RSV_META);
if (ret < 0)
return ret;
atomic64_add(num_bytes, &root->qgroup_meta_rsv);
@@ -3045,7 +3107,8 @@ void btrfs_qgroup_free_meta_all(struct btrfs_root *root)
if (reserved == 0)
return;
trace_qgroup_meta_reserve(root, -(s64)reserved);
- btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved);
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, reserved,
+ BTRFS_QGROUP_RSV_META);
}
void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
@@ -3060,7 +3123,8 @@ void btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes)
WARN_ON(atomic64_read(&root->qgroup_meta_rsv) < num_bytes);
atomic64_sub(num_bytes, &root->qgroup_meta_rsv);
trace_qgroup_meta_reserve(root, -(s64)num_bytes);
- btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes);
+ btrfs_qgroup_free_refroot(fs_info, root->objectid, num_bytes,
+ BTRFS_QGROUP_RSV_META);
}
/*
@@ -3088,7 +3152,7 @@ void btrfs_qgroup_check_reserved_leak(struct inode *inode)
}
btrfs_qgroup_free_refroot(BTRFS_I(inode)->root->fs_info,
BTRFS_I(inode)->root->objectid,
- changeset.bytes_changed);
+ changeset.bytes_changed, BTRFS_QGROUP_RSV_DATA);
}
extent_changeset_release(&changeset);
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index d9984e87cddf..c8c81b923674 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -61,6 +61,26 @@ struct btrfs_qgroup_extent_record {
struct ulist *old_roots;
};
+enum btrfs_qgroup_rsv_type {
+ BTRFS_QGROUP_RSV_DATA = 0,
+ BTRFS_QGROUP_RSV_META,
+ BTRFS_QGROUP_RSV_LAST,
+};
+
+/*
+ * Represents how many bytes we have reserved for this qgroup.
+ *
+ * Each type should have different reservation behavior.
+ * E.g, data follows its io_tree flag modification, while
+ * *currently* meta is just reserve-and-clear during transcation.
+ *
+ * TODO: Add new type for reservation which can survive transaction commit.
+ * Currect metadata reservation behavior is not suitable for such case.
+ */
+struct btrfs_qgroup_rsv {
+ u64 values[BTRFS_QGROUP_RSV_LAST];
+};
+
/*
* one struct for each qgroup, organized in fs_info->qgroup_tree.
*/
@@ -87,7 +107,7 @@ struct btrfs_qgroup {
/*
* reservation tracking
*/
- u64 reserved;
+ struct btrfs_qgroup_rsv rsv;
/*
* lists
@@ -228,12 +248,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
- u64 ref_root, u64 num_bytes);
+ u64 ref_root, u64 num_bytes,
+ enum btrfs_qgroup_rsv_type type);
static inline void btrfs_qgroup_free_delayed_ref(struct btrfs_fs_info *fs_info,
u64 ref_root, u64 num_bytes)
{
trace_btrfs_qgroup_free_delayed_ref(fs_info, ref_root, num_bytes);
- btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes);
+ btrfs_qgroup_free_refroot(fs_info, ref_root, num_bytes,
+ BTRFS_QGROUP_RSV_DATA);
}
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 34878699d363..171f3cce30e6 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -606,8 +606,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
}
/* Walk up to the next node that needs to be processed */
-static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
- int *level)
+static int walk_up_tree(struct btrfs_path *path, int *level)
{
int l;
@@ -984,7 +983,6 @@ void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start,
int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
{
struct btrfs_path *path;
- struct btrfs_root *root;
struct extent_buffer *eb;
u64 bytenr = 0, num_bytes = 0;
int ret, level;
@@ -1014,7 +1012,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
&bytenr, &num_bytes);
if (ret)
break;
- ret = walk_up_tree(root, path, &level);
+ ret = walk_up_tree(path, &level);
if (ret < 0)
break;
if (ret > 0) {
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 3338407ef0f0..aab0194efe46 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -387,13 +387,6 @@ again:
WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1);
- ret = btrfs_is_name_len_valid(leaf, path->slots[0], ptr,
- name_len);
- if (!ret) {
- err = -EIO;
- goto out;
- }
-
WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
*sequence = btrfs_root_ref_sequence(leaf, ref);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 20d3300bd268..f306c608dc28 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1059,12 +1059,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
}
}
- ret = btrfs_is_name_len_valid(eb, path->slots[0],
- (unsigned long)(di + 1), name_len + data_len);
- if (!ret) {
- ret = -EIO;
- goto out;
- }
if (name_len + data_len > buf_len) {
buf_len = name_len + data_len;
if (is_vmalloc_addr(buf)) {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3a4dce153645..138ff32c39b9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -67,6 +67,7 @@
static const struct super_operations btrfs_super_ops;
static struct file_system_type btrfs_fs_type;
+static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
@@ -454,7 +455,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
case Opt_subvolrootid:
case Opt_device:
/*
- * These are parsed by btrfs_parse_early_options
+ * These are parsed by btrfs_parse_subvol_options
+ * and btrfs_parse_early_options
* and can be happily ignored here.
*/
break;
@@ -531,9 +533,10 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_clear_opt(info->mount_opt, NODATASUM);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
no_compress = 0;
- } else if (strcmp(args[0].from, "zstd") == 0) {
+ } else if (strncmp(args[0].from, "zstd", 4) == 0) {
compress_type = "zstd";
info->compress_type = BTRFS_COMPRESS_ZSTD;
+ info->compress_level = btrfs_compress_str2level(args[0].from);
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, NODATACOW);
btrfs_clear_opt(info->mount_opt, NODATASUM);
@@ -888,11 +891,63 @@ out:
* only when we need to allocate a new super block.
*/
static int btrfs_parse_early_options(const char *options, fmode_t flags,
- void *holder, char **subvol_name, u64 *subvol_objectid,
- struct btrfs_fs_devices **fs_devices)
+ void *holder, struct btrfs_fs_devices **fs_devices)
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
+ int error = 0;
+
+ if (!options)
+ return 0;
+
+ /*
+ * strsep changes the string, duplicate it because btrfs_parse_options
+ * gets called later
+ */
+ opts = kstrdup(options, GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+ orig = opts;
+
+ while ((p = strsep(&opts, ",")) != NULL) {
+ int token;
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_device:
+ device_name = match_strdup(&args[0]);
+ if (!device_name) {
+ error = -ENOMEM;
+ goto out;
+ }
+ error = btrfs_scan_one_device(device_name,
+ flags, holder, fs_devices);
+ kfree(device_name);
+ if (error)
+ goto out;
+ break;
+ default:
+ break;
+ }
+ }
+
+out:
+ kfree(orig);
+ return error;
+}
+
+/*
+ * Parse mount options that are related to subvolume id
+ *
+ * The value is later passed to mount_subvol()
+ */
+static int btrfs_parse_subvol_options(const char *options, fmode_t flags,
+ void *holder, char **subvol_name, u64 *subvol_objectid)
+{
+ substring_t args[MAX_OPT_ARGS];
+ char *opts, *orig, *p;
char *num = NULL;
int error = 0;
@@ -900,8 +955,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
return 0;
/*
- * strsep changes the string, duplicate it because parse_options
- * gets called twice
+ * strsep changes the string, duplicate it because
+ * btrfs_parse_early_options gets called later
*/
opts = kstrdup(options, GFP_KERNEL);
if (!opts)
@@ -940,18 +995,6 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
case Opt_subvolrootid:
pr_warn("BTRFS: 'subvolrootid' mount option is deprecated and has no effect\n");
break;
- case Opt_device:
- device_name = match_strdup(&args[0]);
- if (!device_name) {
- error = -ENOMEM;
- goto out;
- }
- error = btrfs_scan_one_device(device_name,
- flags, holder, fs_devices);
- kfree(device_name);
- if (error)
- goto out;
- break;
default:
break;
}
@@ -1365,86 +1408,13 @@ static inline int is_subvolume_inode(struct inode *inode)
return 0;
}
-/*
- * This will add subvolid=0 to the argument string while removing any subvol=
- * and subvolid= arguments to make sure we get the top-level root for path
- * walking to the subvol we want.
- */
-static char *setup_root_args(char *args)
-{
- char *buf, *dst, *sep;
-
- if (!args)
- return kstrdup("subvolid=0", GFP_KERNEL);
-
- /* The worst case is that we add ",subvolid=0" to the end. */
- buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1,
- GFP_KERNEL);
- if (!buf)
- return NULL;
-
- while (1) {
- sep = strchrnul(args, ',');
- if (!strstarts(args, "subvol=") &&
- !strstarts(args, "subvolid=")) {
- memcpy(dst, args, sep - args);
- dst += sep - args;
- *dst++ = ',';
- }
- if (*sep)
- args = sep + 1;
- else
- break;
- }
- strcpy(dst, "subvolid=0");
-
- return buf;
-}
-
static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
int flags, const char *device_name,
- char *data)
+ char *data, struct vfsmount *mnt)
{
struct dentry *root;
- struct vfsmount *mnt = NULL;
- char *newargs;
int ret;
- newargs = setup_root_args(data);
- if (!newargs) {
- root = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
- if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
- if (flags & SB_RDONLY) {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~SB_RDONLY,
- device_name, newargs);
- } else {
- mnt = vfs_kern_mount(&btrfs_fs_type, flags | SB_RDONLY,
- device_name, newargs);
- if (IS_ERR(mnt)) {
- root = ERR_CAST(mnt);
- mnt = NULL;
- goto out;
- }
-
- down_write(&mnt->mnt_sb->s_umount);
- ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
- up_write(&mnt->mnt_sb->s_umount);
- if (ret < 0) {
- root = ERR_PTR(ret);
- goto out;
- }
- }
- }
- if (IS_ERR(mnt)) {
- root = ERR_CAST(mnt);
- mnt = NULL;
- goto out;
- }
-
if (!subvol_name) {
if (!subvol_objectid) {
ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
@@ -1500,7 +1470,6 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
out:
mntput(mnt);
- kfree(newargs);
kfree(subvol_name);
return root;
}
@@ -1558,10 +1527,10 @@ static int setup_security_options(struct btrfs_fs_info *fs_info,
/*
* Find a superblock for the given device / mount point.
*
- * Note: This is based on get_sb_bdev from fs/super.c with a few additions
+ * Note: This is based on mount_bdev from fs/super.c with a few additions
* for multiple device setup. Make sure to keep it in sync.
*/
-static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+static struct dentry *mount_root(struct file_system_type *fs_type, int flags,
const char *device_name, void *data)
{
struct block_device *bdev = NULL;
@@ -1570,27 +1539,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
struct btrfs_fs_info *fs_info = NULL;
struct security_mnt_opts new_sec_opts;
fmode_t mode = FMODE_READ;
- char *subvol_name = NULL;
- u64 subvol_objectid = 0;
int error = 0;
if (!(flags & SB_RDONLY))
mode |= FMODE_WRITE;
error = btrfs_parse_early_options(data, mode, fs_type,
- &subvol_name, &subvol_objectid,
&fs_devices);
if (error) {
- kfree(subvol_name);
return ERR_PTR(error);
}
- if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
- /* mount_subvol() will free subvol_name. */
- return mount_subvol(subvol_name, subvol_objectid, flags,
- device_name, data);
- }
-
security_init_mnt_opts(&new_sec_opts);
if (data) {
error = parse_security_options(data, &new_sec_opts);
@@ -1608,7 +1567,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
* it for searching for existing supers, so this lets us do that and
* then open_ctree will properly initialize everything later.
*/
- fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
+ fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
if (!fs_info) {
error = -ENOMEM;
goto error_sec_opts;
@@ -1616,8 +1575,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
fs_info->fs_devices = fs_devices;
- fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
- fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
+ fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
+ fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_NOFS);
security_init_mnt_opts(&fs_info->security_opts);
if (!fs_info->super_copy || !fs_info->super_for_commit) {
error = -ENOMEM;
@@ -1674,6 +1633,84 @@ error_sec_opts:
return ERR_PTR(error);
}
+/*
+ * Mount function which is called by VFS layer.
+ *
+ * In order to allow mounting a subvolume directly, btrfs uses
+ * mount_subtree() which needs vfsmount* of device's root (/).
+ * This means device's root has to be mounted internally in any case.
+ *
+ * Operation flow:
+ * 1. Parse subvol id related options for later use in mount_subvol().
+ *
+ * 2. Mount device's root (/) by calling vfs_kern_mount().
+ *
+ * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the
+ * first place. In order to avoid calling btrfs_mount() again, we use
+ * different file_system_type which is not registered to VFS by
+ * register_filesystem(). As a result, mount_root() is called.
+ * The return value will be used by mount_subtree() in mount_subvol().
+ *
+ * 3. Call mount_subvol() to get the dentry of subvolume. Since there is
+ * "btrfs subvolume set-default", mount_subvol() is called always.
+ */
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+ const char *device_name, void *data)
+{
+ struct vfsmount *mnt_root;
+ struct dentry *root;
+ fmode_t mode = FMODE_READ;
+ char *subvol_name = NULL;
+ u64 subvol_objectid = 0;
+ int error = 0;
+
+ if (!(flags & SB_RDONLY))
+ mode |= FMODE_WRITE;
+
+ error = btrfs_parse_subvol_options(data, mode, fs_type,
+ &subvol_name, &subvol_objectid);
+ if (error) {
+ kfree(subvol_name);
+ return ERR_PTR(error);
+ }
+
+ /* mount device's root (/) */
+ mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data);
+ if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) {
+ if (flags & SB_RDONLY) {
+ mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags & ~SB_RDONLY,
+ device_name, data);
+ } else {
+ mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags | SB_RDONLY,
+ device_name, data);
+ if (IS_ERR(mnt_root)) {
+ root = ERR_CAST(mnt_root);
+ goto out;
+ }
+
+ down_write(&mnt_root->mnt_sb->s_umount);
+ error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL);
+ up_write(&mnt_root->mnt_sb->s_umount);
+ if (error < 0) {
+ root = ERR_PTR(error);
+ mntput(mnt_root);
+ goto out;
+ }
+ }
+ }
+ if (IS_ERR(mnt_root)) {
+ root = ERR_CAST(mnt_root);
+ goto out;
+ }
+
+ /* mount_subvol() will free subvol_name and mnt_root */
+ root = mount_subvol(subvol_name, subvol_objectid, flags,
+ device_name, data, mnt_root);
+
+out:
+ return root;
+}
+
static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
int new_pool_size, int old_pool_size)
{
@@ -2174,6 +2211,15 @@ static struct file_system_type btrfs_fs_type = {
.kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
};
+
+static struct file_system_type btrfs_root_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "btrfs",
+ .mount = mount_root,
+ .kill_sb = btrfs_kill_super,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+};
+
MODULE_ALIAS_FS("btrfs");
static int btrfs_control_open(struct inode *inode, struct file *file)
@@ -2324,7 +2370,7 @@ static struct miscdevice btrfs_misc = {
MODULE_ALIAS_MISCDEV(BTRFS_MINOR);
MODULE_ALIAS("devname:btrfs-control");
-static int btrfs_interface_init(void)
+static int __init btrfs_interface_init(void)
{
return misc_register(&btrfs_misc);
}
@@ -2334,7 +2380,7 @@ static void btrfs_interface_exit(void)
misc_deregister(&btrfs_misc);
}
-static void btrfs_print_mod_info(void)
+static void __init btrfs_print_mod_info(void)
{
pr_info("Btrfs loaded, crc32c=%s"
#ifdef CONFIG_BTRFS_DEBUG
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index a28bba801264..a8bafed931f4 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -897,7 +897,7 @@ static int btrfs_init_debugfs(void)
return 0;
}
-int btrfs_init_sysfs(void)
+int __init btrfs_init_sysfs(void)
{
int ret;
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index 30affb60da51..13420cd19ef0 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -288,10 +288,6 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
test_msg("Expected a hole, got %llu\n", em->block_start);
goto out;
}
- if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
- test_msg("Vacancy flag wasn't set properly\n");
- goto out;
- }
free_extent_map(em);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
@@ -1001,8 +997,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DIRTY |
- EXTENT_UPTODATE, 0, 0,
- NULL, GFP_KERNEL);
+ EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1070,8 +1065,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_UPTODATE, 0, 0,
- NULL, GFP_KERNEL);
+ EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1104,8 +1098,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_UPTODATE, 0, 0,
- NULL, GFP_KERNEL);
+ EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_msg("clear_extent_bit returned %d\n", ret);
goto out;
@@ -1121,8 +1114,7 @@ out:
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_UPTODATE, 0, 0,
- NULL, GFP_KERNEL);
+ EXTENT_UPTODATE, 0, 0, NULL);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
@@ -1134,7 +1126,6 @@ int btrfs_test_inodes(u32 sectorsize, u32 nodesize)
int ret;
set_bit(EXTENT_FLAG_COMPRESSED, &compressed_only);
- set_bit(EXTENT_FLAG_VACANCY, &vacancy_only);
set_bit(EXTENT_FLAG_PREALLOC, &prealloc_only);
test_msg("Running btrfs_get_extent tests\n");
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5a8c2649af2f..6348573e26a7 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -495,8 +495,8 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (current->journal_info) {
WARN_ON(type & TRANS_EXTWRITERS);
h = current->journal_info;
- h->use_count++;
- WARN_ON(h->use_count > 2);
+ refcount_inc(&h->use_count);
+ WARN_ON(refcount_read(&h->use_count) > 2);
h->orig_rsv = h->block_rsv;
h->block_rsv = NULL;
goto got_it;
@@ -567,7 +567,7 @@ again:
h->transid = cur_trans->transid;
h->transaction = cur_trans;
h->root = root;
- h->use_count = 1;
+ refcount_set(&h->use_count, 1);
h->fs_info = root->fs_info;
h->type = type;
@@ -837,8 +837,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int err = 0;
int must_run_delayed_refs = 0;
- if (trans->use_count > 1) {
- trans->use_count--;
+ if (refcount_read(&trans->use_count) > 1) {
+ refcount_dec(&trans->use_count);
trans->block_rsv = trans->orig_rsv;
return 0;
}
@@ -1016,8 +1016,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
* it's safe to do it (through clear_btree_io_tree()).
*/
err = clear_extent_bit(dirty_pages, start, end,
- EXTENT_NEED_WAIT,
- 0, 0, &cached_state, GFP_NOFS);
+ EXTENT_NEED_WAIT, 0, 0, &cached_state);
if (err == -ENOMEM)
err = 0;
if (!err)
@@ -1869,7 +1868,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
struct btrfs_transaction *cur_trans = trans->transaction;
DEFINE_WAIT(wait);
- WARN_ON(trans->use_count > 1);
+ WARN_ON(refcount_read(&trans->use_count) > 1);
btrfs_abort_transaction(trans, err);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c55e44560103..6beee072b1bd 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -58,6 +58,7 @@ struct btrfs_transaction {
/* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state;
+ int aborted;
struct list_head list;
struct extent_io_tree dirty_pages;
unsigned long start_time;
@@ -70,7 +71,6 @@ struct btrfs_transaction {
struct list_head dirty_bgs;
struct list_head io_bgs;
struct list_head dropped_roots;
- u64 num_dirty_bgs;
/*
* we need to make sure block group deletion doesn't race with
@@ -79,11 +79,11 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
+ unsigned int num_dirty_bgs;
/* Protected by spin lock fs_info->unused_bgs_lock. */
struct list_head deleted_bgs;
spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs;
- int aborted;
struct btrfs_fs_info *fs_info;
};
@@ -111,20 +111,19 @@ struct btrfs_trans_handle {
u64 transid;
u64 bytes_reserved;
u64 chunk_bytes_reserved;
- unsigned long use_count;
- unsigned long blocks_reserved;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *orig_rsv;
+ refcount_t use_count;
+ unsigned int type;
short aborted;
- short adding_csums;
+ bool adding_csums;
bool allocating_chunk;
bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
bool dirty;
- unsigned int type;
struct btrfs_root *root;
struct btrfs_fs_info *fs_info;
struct list_head new_bgs;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index ce4ed6ec8f39..66dac0a4b01f 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -30,6 +30,7 @@
#include "tree-checker.h"
#include "disk-io.h"
#include "compression.h"
+#include "hash.h"
/*
* Error message should follow the following format:
@@ -223,6 +224,141 @@ static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf,
}
/*
+ * Customized reported for dir_item, only important new info is key->objectid,
+ * which represents inode number
+ */
+__printf(4, 5)
+static void dir_item_err(const struct btrfs_root *root,
+ const struct extent_buffer *eb, int slot,
+ const char *fmt, ...)
+{
+ struct btrfs_key key;
+ struct va_format vaf;
+ va_list args;
+
+ btrfs_item_key_to_cpu(eb, &key, slot);
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ btrfs_crit(root->fs_info,
+ "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
+ btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid,
+ btrfs_header_bytenr(eb), slot, key.objectid, &vaf);
+ va_end(args);
+}
+
+static int check_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_key *key, int slot)
+{
+ struct btrfs_dir_item *di;
+ u32 item_size = btrfs_item_size_nr(leaf, slot);
+ u32 cur = 0;
+
+ di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+ while (cur < item_size) {
+ char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
+ u32 name_len;
+ u32 data_len;
+ u32 max_name_len;
+ u32 total_size;
+ u32 name_hash;
+ u8 dir_type;
+
+ /* header itself should not cross item boundary */
+ if (cur + sizeof(*di) > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item header crosses item boundary, have %lu boundary %u",
+ cur + sizeof(*di), item_size);
+ return -EUCLEAN;
+ }
+
+ /* dir type check */
+ dir_type = btrfs_dir_type(leaf, di);
+ if (dir_type >= BTRFS_FT_MAX) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type, have %u expect [0, %u)",
+ dir_type, BTRFS_FT_MAX);
+ return -EUCLEAN;
+ }
+
+ if (key->type == BTRFS_XATTR_ITEM_KEY &&
+ dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "invalid dir item type for XATTR key, have %u expect %u",
+ dir_type, BTRFS_FT_XATTR);
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR &&
+ key->type != BTRFS_XATTR_ITEM_KEY) {
+ dir_item_err(root, leaf, slot,
+ "xattr dir type found for non-XATTR key");
+ return -EUCLEAN;
+ }
+ if (dir_type == BTRFS_FT_XATTR)
+ max_name_len = XATTR_NAME_MAX;
+ else
+ max_name_len = BTRFS_NAME_LEN;
+
+ /* Name/data length check */
+ name_len = btrfs_dir_name_len(leaf, di);
+ data_len = btrfs_dir_data_len(leaf, di);
+ if (name_len > max_name_len) {
+ dir_item_err(root, leaf, slot,
+ "dir item name len too long, have %u max %u",
+ name_len, max_name_len);
+ return -EUCLEAN;
+ }
+ if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
+ dir_item_err(root, leaf, slot,
+ "dir item name and data len too long, have %u max %u",
+ name_len + data_len,
+ BTRFS_MAX_XATTR_SIZE(root->fs_info));
+ return -EUCLEAN;
+ }
+
+ if (data_len && dir_type != BTRFS_FT_XATTR) {
+ dir_item_err(root, leaf, slot,
+ "dir item with invalid data len, have %u expect 0",
+ data_len);
+ return -EUCLEAN;
+ }
+
+ total_size = sizeof(*di) + name_len + data_len;
+
+ /* header and name/data should not cross item boundary */
+ if (cur + total_size > item_size) {
+ dir_item_err(root, leaf, slot,
+ "dir item data crosses item boundary, have %u boundary %u",
+ cur + total_size, item_size);
+ return -EUCLEAN;
+ }
+
+ /*
+ * Special check for XATTR/DIR_ITEM, as key->offset is name
+ * hash, should match its name
+ */
+ if (key->type == BTRFS_DIR_ITEM_KEY ||
+ key->type == BTRFS_XATTR_ITEM_KEY) {
+ read_extent_buffer(leaf, namebuf,
+ (unsigned long)(di + 1), name_len);
+ name_hash = btrfs_name_hash(namebuf, name_len);
+ if (key->offset != name_hash) {
+ dir_item_err(root, leaf, slot,
+ "name hash mismatch with key, have 0x%016x expect 0x%016llx",
+ name_hash, key->offset);
+ return -EUCLEAN;
+ }
+ }
+ cur += total_size;
+ di = (struct btrfs_dir_item *)((void *)di + total_size);
+ }
+ return 0;
+}
+
+/*
* Common point to switch the item-specific validation.
*/
static int check_leaf_item(struct btrfs_root *root,
@@ -238,6 +374,11 @@ static int check_leaf_item(struct btrfs_root *root,
case BTRFS_EXTENT_CSUM_KEY:
ret = check_csum_item(root, leaf, key, slot);
break;
+ case BTRFS_DIR_ITEM_KEY:
+ case BTRFS_DIR_INDEX_KEY:
+ case BTRFS_XATTR_ITEM_KEY:
+ ret = check_dir_item(root, leaf, key, slot);
+ break;
}
return ret;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 7bf9b31561db..ee1aaed1330e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1173,19 +1173,15 @@ next:
return 0;
}
-static int extref_get_fields(struct extent_buffer *eb, int slot,
- unsigned long ref_ptr, u32 *namelen, char **name,
- u64 *index, u64 *parent_objectid)
+static int extref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ u32 *namelen, char **name, u64 *index,
+ u64 *parent_objectid)
{
struct btrfs_inode_extref *extref;
extref = (struct btrfs_inode_extref *)ref_ptr;
*namelen = btrfs_inode_extref_name_len(eb, extref);
- if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)&extref->name,
- *namelen))
- return -EIO;
-
*name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL)
return -ENOMEM;
@@ -1200,19 +1196,14 @@ static int extref_get_fields(struct extent_buffer *eb, int slot,
return 0;
}
-static int ref_get_fields(struct extent_buffer *eb, int slot,
- unsigned long ref_ptr, u32 *namelen, char **name,
- u64 *index)
+static int ref_get_fields(struct extent_buffer *eb, unsigned long ref_ptr,
+ u32 *namelen, char **name, u64 *index)
{
struct btrfs_inode_ref *ref;
ref = (struct btrfs_inode_ref *)ref_ptr;
*namelen = btrfs_inode_ref_name_len(eb, ref);
- if (!btrfs_is_name_len_valid(eb, slot, (unsigned long)(ref + 1),
- *namelen))
- return -EIO;
-
*name = kmalloc(*namelen, GFP_NOFS);
if (*name == NULL)
return -ENOMEM;
@@ -1287,8 +1278,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
while (ref_ptr < ref_end) {
if (log_ref_ver) {
- ret = extref_get_fields(eb, slot, ref_ptr, &namelen,
- &name, &ref_index, &parent_objectid);
+ ret = extref_get_fields(eb, ref_ptr, &namelen, &name,
+ &ref_index, &parent_objectid);
/*
* parent object can change from one array
* item to another.
@@ -1300,8 +1291,8 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
goto out;
}
} else {
- ret = ref_get_fields(eb, slot, ref_ptr, &namelen,
- &name, &ref_index);
+ ret = ref_get_fields(eb, ref_ptr, &namelen, &name,
+ &ref_index);
}
if (ret)
goto out;
@@ -1835,7 +1826,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, int slot,
struct btrfs_key *key)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
u32 item_size = btrfs_item_size_nr(eb, slot);
struct btrfs_dir_item *di;
@@ -1848,8 +1838,6 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
- if (verify_dir_item(fs_info, eb, slot, di))
- return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
if (ret < 0)
@@ -2024,11 +2012,6 @@ again:
ptr_end = ptr + item_size;
while (ptr < ptr_end) {
di = (struct btrfs_dir_item *)ptr;
- if (verify_dir_item(fs_info, eb, slot, di)) {
- ret = -EIO;
- goto out;
- }
-
name_len = btrfs_dir_name_len(eb, di);
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
@@ -2109,7 +2092,6 @@ static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
const u64 ino)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key search_key;
struct btrfs_path *log_path;
int i;
@@ -2151,11 +2133,6 @@ process_leaf:
u32 this_len = sizeof(*di) + name_len + data_len;
char *name;
- ret = verify_dir_item(fs_info, path->nodes[0], i, di);
- if (ret) {
- ret = -EIO;
- goto out;
- }
name = kmalloc(name_len, GFP_NOFS);
if (!name) {
ret = -ENOMEM;
@@ -4572,12 +4549,6 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
this_len = sizeof(*extref) + this_name_len;
}
- ret = btrfs_is_name_len_valid(eb, slot, name_ptr,
- this_name_len);
- if (!ret) {
- ret = -EIO;
- goto out;
- }
if (this_name_len > name_len) {
char *new_name;
@@ -5432,11 +5403,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct dentry *parent,
const loff_t start,
const loff_t end,
- int exists_only,
+ int inode_only,
struct btrfs_log_ctx *ctx)
{
struct btrfs_fs_info *fs_info = root->fs_info;
- int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
struct dentry *old_parent = NULL;
int ret = 0;
@@ -5602,7 +5572,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
int ret;
ret = btrfs_log_inode_parent(trans, root, BTRFS_I(d_inode(dentry)),
- parent, start, end, 0, ctx);
+ parent, start, end, LOG_INODE_ALL, ctx);
dput(parent);
return ret;
@@ -5865,6 +5835,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
return 0;
return btrfs_log_inode_parent(trans, root, inode, parent, 0,
- LLONG_MAX, 1, NULL);
+ LLONG_MAX, LOG_INODE_EXISTS, NULL);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49810b70afd3..a999848e745f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -145,6 +145,71 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
struct btrfs_bio **bbio_ret,
int mirror_num, int need_raid_map);
+/*
+ * Device locking
+ * ==============
+ *
+ * There are several mutexes that protect manipulation of devices and low-level
+ * structures like chunks but not block groups, extents or files
+ *
+ * uuid_mutex (global lock)
+ * ------------------------
+ * protects the fs_uuids list that tracks all per-fs fs_devices, resulting from
+ * the SCAN_DEV ioctl registration or from mount either implicitly (the first
+ * device) or requested by the device= mount option
+ *
+ * the mutex can be very coarse and can cover long-running operations
+ *
+ * protects: updates to fs_devices counters like missing devices, rw devices,
+ * seeding, structure cloning, openning/closing devices at mount/umount time
+ *
+ * global::fs_devs - add, remove, updates to the global list
+ *
+ * does not protect: manipulation of the fs_devices::devices list!
+ *
+ * btrfs_device::name - renames (write side), read is RCU
+ *
+ * fs_devices::device_list_mutex (per-fs, with RCU)
+ * ------------------------------------------------
+ * protects updates to fs_devices::devices, ie. adding and deleting
+ *
+ * simple list traversal with read-only actions can be done with RCU protection
+ *
+ * may be used to exclude some operations from running concurrently without any
+ * modifications to the list (see write_all_supers)
+ *
+ * volume_mutex
+ * ------------
+ * coarse lock owned by a mounted filesystem; used to exclude some operations
+ * that cannot run in parallel and affect the higher-level properties of the
+ * filesystem like: device add/deleting/resize/replace, or balance
+ *
+ * balance_mutex
+ * -------------
+ * protects balance structures (status, state) and context accessed from
+ * several places (internally, ioctl)
+ *
+ * chunk_mutex
+ * -----------
+ * protects chunks, adding or removing during allocation, trim or when a new
+ * device is added/removed
+ *
+ * cleaner_mutex
+ * -------------
+ * a big lock that is held by the cleaner thread and prevents running subvolume
+ * cleaning together with relocation or delayed iputs
+ *
+ *
+ * Lock nesting
+ * ============
+ *
+ * uuid_mutex
+ * volume_mutex
+ * device_list_mutex
+ * chunk_mutex
+ * balance_mutex
+ */
+
DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids);
struct list_head *btrfs_get_fs_uuids(void)
@@ -180,6 +245,13 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
return fs_devs;
}
+static void free_device(struct btrfs_device *device)
+{
+ rcu_string_free(device->name);
+ bio_put(device->flush_bio);
+ kfree(device);
+}
+
static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
{
struct btrfs_device *device;
@@ -188,9 +260,7 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
device = list_entry(fs_devices->devices.next,
struct btrfs_device, dev_list);
list_del(&device->dev_list);
- rcu_string_free(device->name);
- bio_put(device->flush_bio);
- kfree(device);
+ free_device(device);
}
kfree(fs_devices);
}
@@ -220,6 +290,11 @@ void btrfs_cleanup_fs_uuids(void)
}
}
+/*
+ * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
+ * Returned struct is not linked onto any lists and must be destroyed using
+ * free_device.
+ */
static struct btrfs_device *__alloc_device(void)
{
struct btrfs_device *dev;
@@ -578,15 +653,77 @@ static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
} else {
fs_devs->num_devices--;
list_del(&dev->dev_list);
- rcu_string_free(dev->name);
- bio_put(dev->flush_bio);
- kfree(dev);
+ free_device(dev);
}
break;
}
}
}
+static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
+ struct btrfs_device *device, fmode_t flags,
+ void *holder)
+{
+ struct request_queue *q;
+ struct block_device *bdev;
+ struct buffer_head *bh;
+ struct btrfs_super_block *disk_super;
+ u64 devid;
+ int ret;
+
+ if (device->bdev)
+ return -EINVAL;
+ if (!device->name)
+ return -EINVAL;
+
+ ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+ &bdev, &bh);
+ if (ret)
+ return ret;
+
+ disk_super = (struct btrfs_super_block *)bh->b_data;
+ devid = btrfs_stack_device_id(&disk_super->dev_item);
+ if (devid != device->devid)
+ goto error_brelse;
+
+ if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
+ goto error_brelse;
+
+ device->generation = btrfs_super_generation(disk_super);
+
+ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
+ device->writeable = 0;
+ fs_devices->seeding = 1;
+ } else {
+ device->writeable = !bdev_read_only(bdev);
+ }
+
+ q = bdev_get_queue(bdev);
+ if (blk_queue_discard(q))
+ device->can_discard = 1;
+ if (!blk_queue_nonrot(q))
+ fs_devices->rotating = 1;
+
+ device->bdev = bdev;
+ device->in_fs_metadata = 0;
+ device->mode = flags;
+
+ fs_devices->open_devices++;
+ if (device->writeable && device->devid != BTRFS_DEV_REPLACE_DEVID) {
+ fs_devices->rw_devices++;
+ list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+ }
+ brelse(bh);
+
+ return 0;
+
+error_brelse:
+ brelse(bh);
+ blkdev_put(bdev, flags);
+
+ return -EINVAL;
+}
+
/*
* Add new device to list of registered devices
*
@@ -632,8 +769,7 @@ static noinline int device_list_add(const char *path,
name = rcu_string_strdup(path, GFP_NOFS);
if (!name) {
- bio_put(device->flush_bio);
- kfree(device);
+ free_device(device);
return -ENOMEM;
}
rcu_assign_pointer(device->name, name);
@@ -745,8 +881,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
name = rcu_string_strdup(orig_dev->name->str,
GFP_KERNEL);
if (!name) {
- bio_put(device->flush_bio);
- kfree(device);
+ free_device(device);
goto error;
}
rcu_assign_pointer(device->name, name);
@@ -810,9 +945,7 @@ again:
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
- rcu_string_free(device->name);
- bio_put(device->flush_bio);
- kfree(device);
+ free_device(device);
}
if (fs_devices->seed) {
@@ -825,35 +958,25 @@ again:
mutex_unlock(&uuid_mutex);
}
-static void __free_device(struct work_struct *work)
-{
- struct btrfs_device *device;
-
- device = container_of(work, struct btrfs_device, rcu_work);
- rcu_string_free(device->name);
- bio_put(device->flush_bio);
- kfree(device);
-}
-
-static void free_device(struct rcu_head *head)
+static void free_device_rcu(struct rcu_head *head)
{
struct btrfs_device *device;
device = container_of(head, struct btrfs_device, rcu);
-
- INIT_WORK(&device->rcu_work, __free_device);
- schedule_work(&device->rcu_work);
+ free_device(device);
}
static void btrfs_close_bdev(struct btrfs_device *device)
{
- if (device->bdev && device->writeable) {
+ if (!device->bdev)
+ return;
+
+ if (device->writeable) {
sync_blockdev(device->bdev);
invalidate_bdev(device->bdev);
}
- if (device->bdev)
- blkdev_put(device->bdev, device->mode);
+ blkdev_put(device->bdev, device->mode);
}
static void btrfs_prepare_close_one_device(struct btrfs_device *device)
@@ -917,7 +1040,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
struct btrfs_device, dev_list);
list_del(&device->dev_list);
btrfs_close_bdev(device);
- call_rcu(&device->rcu, free_device);
+ call_rcu(&device->rcu, free_device_rcu);
}
WARN_ON(fs_devices->open_devices);
@@ -947,93 +1070,33 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
__btrfs_close_devices(fs_devices);
free_fs_devices(fs_devices);
}
- /*
- * Wait for rcu kworkers under __btrfs_close_devices
- * to finish all blkdev_puts so device is really
- * free when umount is done.
- */
- rcu_barrier();
return ret;
}
static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder)
{
- struct request_queue *q;
- struct block_device *bdev;
struct list_head *head = &fs_devices->devices;
struct btrfs_device *device;
struct btrfs_device *latest_dev = NULL;
- struct buffer_head *bh;
- struct btrfs_super_block *disk_super;
- u64 devid;
- int seeding = 1;
int ret = 0;
flags |= FMODE_EXCL;
list_for_each_entry(device, head, dev_list) {
- if (device->bdev)
- continue;
- if (!device->name)
- continue;
-
/* Just open everything we can; ignore failures here */
- if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
- &bdev, &bh))
+ ret = btrfs_open_one_device(fs_devices, device, flags, holder);
+ if (ret)
continue;
- disk_super = (struct btrfs_super_block *)bh->b_data;
- devid = btrfs_stack_device_id(&disk_super->dev_item);
- if (devid != device->devid)
- goto error_brelse;
-
- if (memcmp(device->uuid, disk_super->dev_item.uuid,
- BTRFS_UUID_SIZE))
- goto error_brelse;
-
- device->generation = btrfs_super_generation(disk_super);
if (!latest_dev ||
device->generation > latest_dev->generation)
latest_dev = device;
-
- if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
- device->writeable = 0;
- } else {
- device->writeable = !bdev_read_only(bdev);
- seeding = 0;
- }
-
- q = bdev_get_queue(bdev);
- if (blk_queue_discard(q))
- device->can_discard = 1;
- if (!blk_queue_nonrot(q))
- fs_devices->rotating = 1;
-
- device->bdev = bdev;
- device->in_fs_metadata = 0;
- device->mode = flags;
-
- fs_devices->open_devices++;
- if (device->writeable &&
- device->devid != BTRFS_DEV_REPLACE_DEVID) {
- fs_devices->rw_devices++;
- list_add(&device->dev_alloc_list,
- &fs_devices->alloc_list);
- }
- brelse(bh);
- continue;
-
-error_brelse:
- brelse(bh);
- blkdev_put(bdev, flags);
- continue;
}
if (fs_devices->open_devices == 0) {
ret = -EINVAL;
goto out;
}
- fs_devices->seeding = seeding;
fs_devices->opened = 1;
fs_devices->latest_bdev = latest_dev->bdev;
fs_devices->total_rw_bytes = 0;
@@ -1662,7 +1725,7 @@ error:
* the device information is stored in the chunk root
* the btrfs_device struct should be fully filled in
*/
-static int btrfs_add_device(struct btrfs_trans_handle *trans,
+static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_device *device)
{
@@ -1859,6 +1922,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
u64 num_devices;
int ret = 0;
+ mutex_lock(&fs_info->volume_mutex);
mutex_lock(&uuid_mutex);
num_devices = fs_info->fs_devices->num_devices;
@@ -1954,7 +2018,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
btrfs_scratch_superblocks(device->bdev, device->name->str);
btrfs_close_bdev(device);
- call_rcu(&device->rcu, free_device);
+ call_rcu(&device->rcu, free_device_rcu);
if (cur_devices->open_devices == 0) {
struct btrfs_fs_devices *fs_devices;
@@ -1973,6 +2037,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
out:
mutex_unlock(&uuid_mutex);
+ mutex_unlock(&fs_info->volume_mutex);
return ret;
error_undo:
@@ -2025,7 +2090,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
}
btrfs_close_bdev(srcdev);
- call_rcu(&srcdev->rcu, free_device);
+ call_rcu(&srcdev->rcu, free_device_rcu);
/* if this is no devs we rather delete the fs_devices */
if (!fs_devices->num_devices) {
@@ -2084,7 +2149,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
btrfs_close_bdev(tgtdev);
- call_rcu(&tgtdev->rcu, free_device);
+ call_rcu(&tgtdev->rcu, free_device_rcu);
}
static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
@@ -2358,20 +2423,15 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
- bio_put(device->flush_bio);
- kfree(device);
ret = -ENOMEM;
- goto error;
+ goto error_free_device;
}
rcu_assign_pointer(device->name, name);
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
- rcu_string_free(device->name);
- bio_put(device->flush_bio);
- kfree(device);
ret = PTR_ERR(trans);
- goto error;
+ goto error_free_device;
}
q = bdev_get_queue(bdev);
@@ -2450,7 +2510,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
}
}
- ret = btrfs_add_device(trans, fs_info, device);
+ ret = btrfs_add_dev_item(trans, fs_info, device);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto error_sysfs;
@@ -2511,9 +2571,8 @@ error_trans:
sb->s_flags |= SB_RDONLY;
if (trans)
btrfs_end_transaction(trans);
- rcu_string_free(device->name);
- bio_put(device->flush_bio);
- kfree(device);
+error_free_device:
+ free_device(device);
error:
blkdev_put(bdev, FMODE_EXCL);
if (seeding_dev && !unlocked) {
@@ -2579,8 +2638,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
- bio_put(device->flush_bio);
- kfree(device);
+ free_device(device);
ret = -ENOMEM;
goto error;
}
@@ -6004,15 +6062,14 @@ static void btrfs_end_bio(struct bio *bio)
dev = bbio->stripes[stripe_index].dev;
if (dev->bdev) {
if (bio_op(bio) == REQ_OP_WRITE)
- btrfs_dev_stat_inc(dev,
+ btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else
- btrfs_dev_stat_inc(dev,
+ btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
- btrfs_dev_stat_inc(dev,
+ btrfs_dev_stat_inc_and_print(dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
- btrfs_dev_stat_print_on_error(dev);
}
}
}
@@ -6273,8 +6330,8 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
* is generated.
*
* Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
- * on error. Returned struct is not linked onto any lists and can be
- * destroyed with kfree() right away.
+ * on error. Returned struct is not linked onto any lists and must be
+ * destroyed with free_device.
*/
struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid,
@@ -6297,8 +6354,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
ret = find_next_devid(fs_info, &tmp);
if (ret) {
- bio_put(dev->flush_bio);
- kfree(dev);
+ free_device(dev);
return ERR_PTR(ret);
}
}
@@ -7092,10 +7148,24 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) {
- if (!device->dev_stats_valid || !btrfs_dev_stats_dirty(device))
+ stats_cnt = atomic_read(&device->dev_stats_ccnt);
+ if (!device->dev_stats_valid || stats_cnt == 0)
continue;
- stats_cnt = atomic_read(&device->dev_stats_ccnt);
+
+ /*
+ * There is a LOAD-LOAD control dependency between the value of
+ * dev_stats_ccnt and updating the on-disk values which requires
+ * reading the in-memory counters. Such control dependencies
+ * require explicit read memory barriers.
+ *
+ * This memory barriers pairs with smp_mb__before_atomic in
+ * btrfs_dev_stat_inc/btrfs_dev_stat_set and with the full
+ * barrier implied by atomic_xchg in
+ * btrfs_dev_stats_read_and_reset
+ */
+ smp_rmb();
+
ret = update_dev_stat_item(trans, fs_info, device);
if (!ret)
atomic_sub(stats_cnt, &device->dev_stats_ccnt);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ff15208344a7..294c4eb6a272 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -133,7 +133,6 @@ struct btrfs_device {
struct btrfs_work work;
struct rcu_head rcu;
- struct work_struct rcu_work;
/* readahead state */
spinlock_t reada_lock;
@@ -489,15 +488,16 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 chunk_offset);
-static inline int btrfs_dev_stats_dirty(struct btrfs_device *dev)
-{
- return atomic_read(&dev->dev_stats_ccnt);
-}
-
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
int index)
{
atomic_inc(dev->dev_stat_values + index);
+ /*
+ * This memory barrier orders stores updating statistics before stores
+ * updating dev_stats_ccnt.
+ *
+ * It pairs with smp_rmb() in btrfs_run_dev_stats().
+ */
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
@@ -514,7 +514,13 @@ static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
int ret;
ret = atomic_xchg(dev->dev_stat_values + index, 0);
- smp_mb__before_atomic();
+ /*
+ * atomic_xchg implies a full memory barriers as per atomic_t.txt:
+ * - RMW operations that have a return value are fully ordered;
+ *
+ * This implicit memory barriers is paired with the smp_rmb in
+ * btrfs_run_dev_stats
+ */
atomic_inc(&dev->dev_stats_ccnt);
return ret;
}
@@ -523,6 +529,12 @@ static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
int index, unsigned long val)
{
atomic_set(dev->dev_stat_values + index, val);
+ /*
+ * This memory barrier orders stores updating statistics before stores
+ * updating dev_stats_ccnt.
+ *
+ * It pairs with smp_rmb() in btrfs_run_dev_stats().
+ */
smp_mb__before_atomic();
atomic_inc(&dev->dev_stats_ccnt);
}
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 2c7e53f9ff1b..ad298c248da4 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -267,7 +267,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
struct btrfs_key key;
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
int ret = 0;
@@ -336,11 +335,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
u32 this_len = sizeof(*di) + name_len + data_len;
unsigned long name_ptr = (unsigned long)(di + 1);
- if (verify_dir_item(fs_info, leaf, slot, di)) {
- ret = -EIO;
- goto err;
- }
-
total_size += name_len + 1;
/*
* We are just looking for how big our buffer needs to
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 17f2dd8fddb8..eb3e39012658 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -26,11 +26,14 @@
#define ZSTD_BTRFS_MAX_WINDOWLOG 17
#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
#define ZSTD_BTRFS_DEFAULT_LEVEL 3
+// Max supported by the algorithm is 22, but gains for small blocks (128KB)
+// are limited, thus we cap at 15.
+#define ZSTD_BTRFS_MAX_LEVEL 15
-static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
+static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len, int level)
{
- ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL,
- src_len, 0);
+ ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
+ BUG_ON(level > ZSTD_maxCLevel());
if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
@@ -43,6 +46,9 @@ struct workspace {
size_t size;
char *buf;
struct list_head list;
+ ZSTD_inBuffer in_buf;
+ ZSTD_outBuffer out_buf;
+ int level;
};
static void zstd_free_workspace(struct list_head *ws)
@@ -57,7 +63,8 @@ static void zstd_free_workspace(struct list_head *ws)
static struct list_head *zstd_alloc_workspace(void)
{
ZSTD_parameters params =
- zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT);
+ zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT,
+ ZSTD_BTRFS_MAX_LEVEL);
struct workspace *workspace;
workspace = kzalloc(sizeof(*workspace), GFP_KERNEL);
@@ -94,14 +101,12 @@ static int zstd_compress_pages(struct list_head *ws,
int nr_pages = 0;
struct page *in_page = NULL; /* The current page to read */
struct page *out_page = NULL; /* The current page to write to */
- ZSTD_inBuffer in_buf = { NULL, 0, 0 };
- ZSTD_outBuffer out_buf = { NULL, 0, 0 };
unsigned long tot_in = 0;
unsigned long tot_out = 0;
unsigned long len = *total_out;
const unsigned long nr_dest_pages = *out_pages;
unsigned long max_out = nr_dest_pages * PAGE_SIZE;
- ZSTD_parameters params = zstd_get_btrfs_parameters(len);
+ ZSTD_parameters params = zstd_get_btrfs_parameters(len, workspace->level);
*out_pages = 0;
*total_out = 0;
@@ -118,9 +123,9 @@ static int zstd_compress_pages(struct list_head *ws,
/* map in the first page of input data */
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- in_buf.src = kmap(in_page);
- in_buf.pos = 0;
- in_buf.size = min_t(size_t, len, PAGE_SIZE);
+ workspace->in_buf.src = kmap(in_page);
+ workspace->in_buf.pos = 0;
+ workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
/* Allocate and map in the output buffer */
@@ -130,14 +135,15 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
pages[nr_pages++] = out_page;
- out_buf.dst = kmap(out_page);
- out_buf.pos = 0;
- out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.pos = 0;
+ workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
while (1) {
size_t ret2;
- ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf);
+ ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
+ &workspace->in_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
ZSTD_getErrorCode(ret2));
@@ -146,22 +152,22 @@ static int zstd_compress_pages(struct list_head *ws,
}
/* Check to see if we are making it bigger */
- if (tot_in + in_buf.pos > 8192 &&
- tot_in + in_buf.pos <
- tot_out + out_buf.pos) {
+ if (tot_in + workspace->in_buf.pos > 8192 &&
+ tot_in + workspace->in_buf.pos <
+ tot_out + workspace->out_buf.pos) {
ret = -E2BIG;
goto out;
}
/* We've reached the end of our output range */
- if (out_buf.pos >= max_out) {
- tot_out += out_buf.pos;
+ if (workspace->out_buf.pos >= max_out) {
+ tot_out += workspace->out_buf.pos;
ret = -E2BIG;
goto out;
}
/* Check if we need more output space */
- if (out_buf.pos == out_buf.size) {
+ if (workspace->out_buf.pos == workspace->out_buf.size) {
tot_out += PAGE_SIZE;
max_out -= PAGE_SIZE;
kunmap(out_page);
@@ -176,19 +182,20 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
pages[nr_pages++] = out_page;
- out_buf.dst = kmap(out_page);
- out_buf.pos = 0;
- out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.pos = 0;
+ workspace->out_buf.size = min_t(size_t, max_out,
+ PAGE_SIZE);
}
/* We've reached the end of the input */
- if (in_buf.pos >= len) {
- tot_in += in_buf.pos;
+ if (workspace->in_buf.pos >= len) {
+ tot_in += workspace->in_buf.pos;
break;
}
/* Check if we need more input */
- if (in_buf.pos == in_buf.size) {
+ if (workspace->in_buf.pos == workspace->in_buf.size) {
tot_in += PAGE_SIZE;
kunmap(in_page);
put_page(in_page);
@@ -196,15 +203,15 @@ static int zstd_compress_pages(struct list_head *ws,
start += PAGE_SIZE;
len -= PAGE_SIZE;
in_page = find_get_page(mapping, start >> PAGE_SHIFT);
- in_buf.src = kmap(in_page);
- in_buf.pos = 0;
- in_buf.size = min_t(size_t, len, PAGE_SIZE);
+ workspace->in_buf.src = kmap(in_page);
+ workspace->in_buf.pos = 0;
+ workspace->in_buf.size = min_t(size_t, len, PAGE_SIZE);
}
}
while (1) {
size_t ret2;
- ret2 = ZSTD_endStream(stream, &out_buf);
+ ret2 = ZSTD_endStream(stream, &workspace->out_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_endStream returned %d\n",
ZSTD_getErrorCode(ret2));
@@ -212,11 +219,11 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
if (ret2 == 0) {
- tot_out += out_buf.pos;
+ tot_out += workspace->out_buf.pos;
break;
}
- if (out_buf.pos >= max_out) {
- tot_out += out_buf.pos;
+ if (workspace->out_buf.pos >= max_out) {
+ tot_out += workspace->out_buf.pos;
ret = -E2BIG;
goto out;
}
@@ -235,9 +242,9 @@ static int zstd_compress_pages(struct list_head *ws,
goto out;
}
pages[nr_pages++] = out_page;
- out_buf.dst = kmap(out_page);
- out_buf.pos = 0;
- out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+ workspace->out_buf.dst = kmap(out_page);
+ workspace->out_buf.pos = 0;
+ workspace->out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
}
if (tot_out >= tot_in) {
@@ -273,8 +280,6 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
unsigned long total_out = 0;
- ZSTD_inBuffer in_buf = { NULL, 0, 0 };
- ZSTD_outBuffer out_buf = { NULL, 0, 0 };
stream = ZSTD_initDStream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
@@ -284,18 +289,19 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
goto done;
}
- in_buf.src = kmap(pages_in[page_in_index]);
- in_buf.pos = 0;
- in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.pos = 0;
+ workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
- out_buf.dst = workspace->buf;
- out_buf.pos = 0;
- out_buf.size = PAGE_SIZE;
+ workspace->out_buf.dst = workspace->buf;
+ workspace->out_buf.pos = 0;
+ workspace->out_buf.size = PAGE_SIZE;
while (1) {
size_t ret2;
- ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+ ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+ &workspace->in_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
ZSTD_getErrorCode(ret2));
@@ -303,38 +309,38 @@ static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
goto done;
}
buf_start = total_out;
- total_out += out_buf.pos;
- out_buf.pos = 0;
+ total_out += workspace->out_buf.pos;
+ workspace->out_buf.pos = 0;
- ret = btrfs_decompress_buf2page(out_buf.dst, buf_start,
- total_out, disk_start, orig_bio);
+ ret = btrfs_decompress_buf2page(workspace->out_buf.dst,
+ buf_start, total_out, disk_start, orig_bio);
if (ret == 0)
break;
- if (in_buf.pos >= srclen)
+ if (workspace->in_buf.pos >= srclen)
break;
/* Check if we've hit the end of a frame */
if (ret2 == 0)
break;
- if (in_buf.pos == in_buf.size) {
+ if (workspace->in_buf.pos == workspace->in_buf.size) {
kunmap(pages_in[page_in_index++]);
if (page_in_index >= total_pages_in) {
- in_buf.src = NULL;
+ workspace->in_buf.src = NULL;
ret = -EIO;
goto done;
}
srclen -= PAGE_SIZE;
- in_buf.src = kmap(pages_in[page_in_index]);
- in_buf.pos = 0;
- in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+ workspace->in_buf.src = kmap(pages_in[page_in_index]);
+ workspace->in_buf.pos = 0;
+ workspace->in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
}
}
ret = 0;
zero_fill_bio(orig_bio);
done:
- if (in_buf.src)
+ if (workspace->in_buf.src)
kunmap(pages_in[page_in_index]);
return ret;
}
@@ -348,8 +354,6 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
ZSTD_DStream *stream;
int ret = 0;
size_t ret2;
- ZSTD_inBuffer in_buf = { NULL, 0, 0 };
- ZSTD_outBuffer out_buf = { NULL, 0, 0 };
unsigned long total_out = 0;
unsigned long pg_offset = 0;
char *kaddr;
@@ -364,16 +368,17 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
destlen = min_t(size_t, destlen, PAGE_SIZE);
- in_buf.src = data_in;
- in_buf.pos = 0;
- in_buf.size = srclen;
+ workspace->in_buf.src = data_in;
+ workspace->in_buf.pos = 0;
+ workspace->in_buf.size = srclen;
- out_buf.dst = workspace->buf;
- out_buf.pos = 0;
- out_buf.size = PAGE_SIZE;
+ workspace->out_buf.dst = workspace->buf;
+ workspace->out_buf.pos = 0;
+ workspace->out_buf.size = PAGE_SIZE;
ret2 = 1;
- while (pg_offset < destlen && in_buf.pos < in_buf.size) {
+ while (pg_offset < destlen
+ && workspace->in_buf.pos < workspace->in_buf.size) {
unsigned long buf_start;
unsigned long buf_offset;
unsigned long bytes;
@@ -384,7 +389,8 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
ret = -EIO;
goto finish;
}
- ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+ ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+ &workspace->in_buf);
if (ZSTD_isError(ret2)) {
pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
ZSTD_getErrorCode(ret2));
@@ -393,8 +399,8 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
}
buf_start = total_out;
- total_out += out_buf.pos;
- out_buf.pos = 0;
+ total_out += workspace->out_buf.pos;
+ workspace->out_buf.pos = 0;
if (total_out <= start_byte)
continue;
@@ -405,10 +411,11 @@ static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
buf_offset = 0;
bytes = min_t(unsigned long, destlen - pg_offset,
- out_buf.size - buf_offset);
+ workspace->out_buf.size - buf_offset);
kaddr = kmap_atomic(dest_page);
- memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes);
+ memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
+ bytes);
kunmap_atomic(kaddr);
pg_offset += bytes;
@@ -425,6 +432,10 @@ finish:
static void zstd_set_level(struct list_head *ws, unsigned int type)
{
+ struct workspace *workspace = list_entry(ws, struct workspace, list);
+ unsigned level = (type & 0xF0) >> 4;
+
+ workspace->level = level > 0 ? level : ZSTD_BTRFS_DEFAULT_LEVEL;
}
const struct btrfs_compress_op btrfs_zstd_compress = {