From a9185b41a4f84971b930c519f0c63bd450c4810d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Mar 2010 09:21:37 +0100 Subject: pass writeback_control to ->write_inode This gives the filesystem more information about the writeback that is happening. Trond requested this for the NFS unstable write handling, and other filesystems might benefit from this too by beeing able to distinguish between the different callers in more detail. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4deb280f8969..c41db6d45ab6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3968,7 +3968,7 @@ err: return ret; } -int btrfs_write_inode(struct inode *inode, int wait) +int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; @@ -3977,7 +3977,7 @@ int btrfs_write_inode(struct inode *inode, int wait) if (root->fs_info->btree_inode == inode) return 0; - if (wait) { + if (wbc->sync_mode == WB_SYNC_ALL) { trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); ret = btrfs_commit_transaction(trans, root); -- cgit v1.2.3 From 73f73415caddbc01d9f10c03e0a677d5b3d11569 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 4 Dec 2009 17:38:27 +0000 Subject: Btrfs: change how we mount subvolumes This work is in preperation for being able to set a different root as the default mounting root. There is currently a problem with how we mount subvolumes. We cannot currently mount a subvolume of a subvolume, you can only mount subvolumes/snapshots of the default subvolume. So say you take a snapshot of the default subvolume and call it snap1, and then take a snapshot of snap1 and call it snap2, so now you have / /snap1 /snap1/snap2 as your available volumes. Currently you can only mount / and /snap1, you cannot mount /snap1/snap2. To fix this problem instead of passing subvolid= you must pass in subvolid=, where is the tree id that gets spit out via the subvolume listing you get from the subvolume listing patches (btrfs filesystem list). This allows us to mount /, /snap1 and /snap1/snap2 as the root volume. In addition to the above, we also now read the default dir item in the tree root to get the root key that it points to. For now this just points at what has always been the default subvolme, but later on I plan to change it to point at whatever root you want to be the new default root, so you can just set the default mount and not have to mount with -o subvolid=. I tested this out with the above scenario and it worked perfectly. Thanks, mount -o subvol operates inside the selected subvolid. For example: mount -o subvol=snap1,subvolid=256 /dev/xxx /mnt /mnt will have the snap1 directory for the subvolume with id 256. mount -o subvol=snap /dev/xxx /mnt /mnt will be the snap directory of whatever the default subvolume is. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/export.c | 4 +- fs/btrfs/inode.c | 10 +-- fs/btrfs/relocation.c | 2 +- fs/btrfs/super.c | 172 ++++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/tree-log.c | 2 +- 6 files changed, 158 insertions(+), 34 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index abbce4d90c1b..07d956977a07 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2335,7 +2335,7 @@ int btrfs_init_cachep(void); void btrfs_destroy_cachep(void); long btrfs_ioctl_trans_end(struct file *file); struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root); + struct btrfs_root *root, int *was_new); int btrfs_commit_write(struct file *file, struct page *page, unsigned from, unsigned to); struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index ba5c3fd5ab8c..951ef09b82f4 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -95,7 +95,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid, btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); key.offset = 0; - inode = btrfs_iget(sb, &key, root); + inode = btrfs_iget(sb, &key, root, NULL); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto fail; @@ -223,7 +223,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); + dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL)); if (!IS_ERR(dentry)) dentry->d_op = &btrfs_dentry_operations; return dentry; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4deb280f8969..7d10d1ccb0fe 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2153,7 +2153,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &found_key, root); + inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); if (IS_ERR(inode)) break; @@ -3687,7 +3687,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s, * Returns in *is_new if the inode was read from disk */ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, - struct btrfs_root *root) + struct btrfs_root *root, int *new) { struct inode *inode; @@ -3702,6 +3702,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, inode_tree_add(inode); unlock_new_inode(inode); + if (new) + *new = 1; } return inode; @@ -3754,7 +3756,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) return NULL; if (location.type == BTRFS_INODE_ITEM_KEY) { - inode = btrfs_iget(dir->i_sb, &location, root); + inode = btrfs_iget(dir->i_sb, &location, root, NULL); return inode; } @@ -3769,7 +3771,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) else inode = new_simple_dir(dir->i_sb, &location, sub_root); } else { - inode = btrfs_iget(dir->i_sb, &location, sub_root); + inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL); } srcu_read_unlock(&root->fs_info->subvol_srcu, index); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0109e5606bad..d52759daa53f 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3487,7 +3487,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, key.objectid = objectid; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &key, root); + inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); BTRFS_I(inode)->index_cnt = group->key.objectid; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index f8b4521de907..f878337cee6f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -63,10 +63,10 @@ static void btrfs_put_super(struct super_block *sb) } enum { - Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, - Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, - Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, - Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, + Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, + Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, + Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, + Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_err, }; @@ -74,6 +74,7 @@ enum { static match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, + {Opt_subvolid, "subvolid=%d"}, {Opt_device, "device=%s"}, {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, @@ -157,6 +158,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) btrfs_set_opt(info->mount_opt, DEGRADED); break; case Opt_subvol: + case Opt_subvolid: case Opt_device: /* * These are parsed by btrfs_parse_early_options @@ -292,12 +294,13 @@ out: * only when we need to allocate a new super block. */ static int btrfs_parse_early_options(const char *options, fmode_t flags, - void *holder, char **subvol_name, + void *holder, char **subvol_name, u64 *subvol_objectid, struct btrfs_fs_devices **fs_devices) { substring_t args[MAX_OPT_ARGS]; char *opts, *p; int error = 0; + int intarg; if (!options) goto out; @@ -320,6 +323,12 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, case Opt_subvol: *subvol_name = match_strdup(&args[0]); break; + case Opt_subvolid: + intarg = 0; + match_int(&args[0], &intarg); + if (intarg) + *subvol_objectid = intarg; + break; case Opt_device: error = btrfs_scan_one_device(match_strdup(&args[0]), flags, holder, fs_devices); @@ -347,6 +356,110 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags, return error; } +static struct dentry *get_default_root(struct super_block *sb, + u64 subvol_objectid) +{ + struct btrfs_root *root = sb->s_fs_info; + struct btrfs_root *new_root; + struct btrfs_dir_item *di; + struct btrfs_path *path; + struct btrfs_key location; + struct inode *inode; + struct dentry *dentry; + u64 dir_id; + int new = 0; + + /* + * We have a specific subvol we want to mount, just setup location and + * go look up the root. + */ + if (subvol_objectid) { + location.objectid = subvol_objectid; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + goto find_root; + } + + path = btrfs_alloc_path(); + if (!path) + return ERR_PTR(-ENOMEM); + path->leave_spinning = 1; + + /* + * Find the "default" dir item which points to the root item that we + * will mount by default if we haven't been given a specific subvolume + * to mount. + */ + dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); + if (!di) { + /* + * Ok the default dir item isn't there. This is weird since + * it's always been there, but don't freak out, just try and + * mount to root most subvolume. + */ + btrfs_free_path(path); + dir_id = BTRFS_FIRST_FREE_OBJECTID; + new_root = root->fs_info->fs_root; + goto setup_root; + } + + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + btrfs_free_path(path); + +find_root: + new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); + if (IS_ERR(new_root)) + return ERR_PTR(PTR_ERR(new_root)); + + if (btrfs_root_refs(&new_root->root_item) == 0) + return ERR_PTR(-ENOENT); + + dir_id = btrfs_root_dirid(&new_root->root_item); +setup_root: + location.objectid = dir_id; + location.type = BTRFS_INODE_ITEM_KEY; + location.offset = 0; + + inode = btrfs_iget(sb, &location, new_root, &new); + if (!inode) + return ERR_PTR(-ENOMEM); + + /* + * If we're just mounting the root most subvol put the inode and return + * a reference to the dentry. We will have already gotten a reference + * to the inode in btrfs_fill_super so we're good to go. + */ + if (!new && sb->s_root->d_inode == inode) { + iput(inode); + return dget(sb->s_root); + } + + if (new) { + const struct qstr name = { .name = "/", .len = 1 }; + + /* + * New inode, we need to make the dentry a sibling of s_root so + * everything gets cleaned up properly on unmount. + */ + dentry = d_alloc(sb->s_root, &name); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + d_splice_alias(inode, dentry); + } else { + /* + * We found the inode in cache, just find a dentry for it and + * put the reference to the inode we just got. + */ + dentry = d_find_alias(inode); + iput(inode); + } + + return dentry; +} + static int btrfs_fill_super(struct super_block *sb, struct btrfs_fs_devices *fs_devices, void *data, int silent) @@ -380,7 +493,7 @@ static int btrfs_fill_super(struct super_block *sb, key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root); + inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto fail_close; @@ -392,12 +505,6 @@ static int btrfs_fill_super(struct super_block *sb, err = -ENOMEM; goto fail_close; } -#if 0 - /* this does the super kobj at the same time */ - err = btrfs_sysfs_add_super(tree_root->fs_info); - if (err) - goto fail_close; -#endif sb->s_root = root_dentry; @@ -489,19 +596,22 @@ static int btrfs_test_super(struct super_block *s, void *data) static int btrfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - char *subvol_name = NULL; struct block_device *bdev = NULL; struct super_block *s; struct dentry *root; struct btrfs_fs_devices *fs_devices = NULL; fmode_t mode = FMODE_READ; + char *subvol_name = NULL; + u64 subvol_objectid = 0; int error = 0; + int found = 0; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; error = btrfs_parse_early_options(data, mode, fs_type, - &subvol_name, &fs_devices); + &subvol_name, &subvol_objectid, + &fs_devices); if (error) return error; @@ -530,6 +640,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, goto error_close_devices; } + found = 1; btrfs_close_devices(fs_devices); } else { char b[BDEVNAME_SIZE]; @@ -547,25 +658,35 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags, s->s_flags |= MS_ACTIVE; } - if (!strcmp(subvol_name, ".")) - root = dget(s->s_root); - else { - mutex_lock(&s->s_root->d_inode->i_mutex); - root = lookup_one_len(subvol_name, s->s_root, + root = get_default_root(s, subvol_objectid); + if (IS_ERR(root)) { + error = PTR_ERR(root); + deactivate_locked_super(s); + goto error; + } + /* if they gave us a subvolume name bind mount into that */ + if (strcmp(subvol_name, ".")) { + struct dentry *new_root; + mutex_lock(&root->d_inode->i_mutex); + new_root = lookup_one_len(subvol_name, root, strlen(subvol_name)); - mutex_unlock(&s->s_root->d_inode->i_mutex); + mutex_unlock(&root->d_inode->i_mutex); - if (IS_ERR(root)) { + if (IS_ERR(new_root)) { deactivate_locked_super(s); - error = PTR_ERR(root); - goto error_free_subvol_name; + error = PTR_ERR(new_root); + dput(root); + goto error_close_devices; } - if (!root->d_inode) { + if (!new_root->d_inode) { dput(root); + dput(new_root); deactivate_locked_super(s); error = -ENXIO; - goto error_free_subvol_name; + goto error_close_devices; } + dput(root); + root = new_root; } mnt->mnt_sb = s; @@ -580,6 +701,7 @@ error_close_devices: btrfs_close_devices(fs_devices); error_free_subvol_name: kfree(subvol_name); +error: return error; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4a9434b622ec..1255fcc8ade5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -445,7 +445,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root, key.objectid = objectid; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &key, root); + inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); if (IS_ERR(inode)) { inode = NULL; } else if (is_bad_inode(inode)) { -- cgit v1.2.3 From 1e701a3292e25a6c4939cad9f24951dc6b6ad853 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Mar 2010 09:42:04 -0500 Subject: Btrfs: add new defrag-range ioctl. The btrfs defrag ioctl was limited to doing the entire file. This commit adds a new interface that can defrag a specific range inside the file. It can also force compression on the file, allowing you to selectively compress individual files after they were created, even when mount -o compress isn't turned on. Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 5 +++ fs/btrfs/ctree.h | 1 - fs/btrfs/inode.c | 11 +++++-- fs/btrfs/ioctl.c | 83 ++++++++++++++++++++++++++++++++++++++++++++------ fs/btrfs/ioctl.h | 31 +++++++++++++++++++ 5 files changed, 117 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 3f1f50d9d916..7a4dee199832 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -153,6 +153,11 @@ struct btrfs_inode { unsigned ordered_data_close:1; unsigned dummy_inode:1; + /* + * always compress this one file + */ + unsigned force_compress:1; + struct inode vfs_inode; }; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1166b15e9bf6..3a36b1fb553a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1184,7 +1184,6 @@ struct btrfs_root { #define BTRFS_INODE_NOATIME (1 << 9) #define BTRFS_INODE_DIRSYNC (1 << 10) - /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple * one for u8: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7d10d1ccb0fe..3657925c2461 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -379,7 +379,8 @@ again: * change at any time if we discover bad compression ratios. */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && - btrfs_test_opt(root, COMPRESS)) { + (btrfs_test_opt(root, COMPRESS) || + (BTRFS_I(inode)->force_compress))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); @@ -483,8 +484,10 @@ again: nr_pages_ret = 0; /* flag the file so we don't compress in the future */ - if (!btrfs_test_opt(root, FORCE_COMPRESS)) + if (!btrfs_test_opt(root, FORCE_COMPRESS) && + !(BTRFS_I(inode)->force_compress)) { BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; + } } if (will_compress) { *num_added += 1; @@ -1211,7 +1214,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - else if (!btrfs_test_opt(root, COMPRESS)) + else if (!btrfs_test_opt(root, COMPRESS) && + !(BTRFS_I(inode)->force_compress)) ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); else @@ -3639,6 +3643,7 @@ static noinline void init_btrfs_i(struct inode *inode) bi->index_cnt = (u64)-1; bi->last_unlink_trans = 0; bi->ordered_data_close = 0; + bi->force_compress = 0; extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); extent_io_tree_init(&BTRFS_I(inode)->io_tree, inode->i_mapping, GFP_NOFS); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3a89cd77f307..d866b460c26e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -476,13 +476,18 @@ out_unlock: } static int should_defrag_range(struct inode *inode, u64 start, u64 len, - u64 *last_len, u64 *skip, u64 *defrag_end) + int thresh, u64 *last_len, u64 *skip, + u64 *defrag_end) { struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 1; + + if (thresh == 0) + thresh = 256 * 1024; + /* * make sure that once we start defragging and extent, we keep on * defragging it @@ -517,8 +522,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, /* * we hit a real extent, if it is big don't bother defragging it again */ - if ((*last_len == 0 || *last_len >= 256 * 1024) && - em->len >= 256 * 1024) + if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) ret = 0; /* @@ -542,7 +546,8 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, return ret; } -static int btrfs_defrag_file(struct file *file) +static int btrfs_defrag_file(struct file *file, + struct btrfs_ioctl_defrag_range_args *range) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -563,11 +568,19 @@ static int btrfs_defrag_file(struct file *file) if (inode->i_size == 0) return 0; - last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; - i = 0; + if (range->start + range->len > range->start) { + last_index = min_t(u64, inode->i_size - 1, + range->start + range->len - 1) >> PAGE_CACHE_SHIFT; + } else { + last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + } + + i = range->start >> PAGE_CACHE_SHIFT; while (i <= last_index) { if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &last_len, &skip, + PAGE_CACHE_SIZE, + range->extent_thresh, + &last_len, &skip, &defrag_end)) { unsigned long next; /* @@ -585,6 +598,8 @@ static int btrfs_defrag_file(struct file *file) } total_read++; mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = 1; ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); if (ret) { @@ -673,6 +688,28 @@ loop_unlock: i++; } + if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) + filemap_flush(inode->i_mapping); + + if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + /* the filemap_flush will queue IO into the worker threads, but + * we have to make sure the IO is actually started and that + * ordered extents get created before we return + */ + atomic_inc(&root->fs_info->async_submit_draining); + while (atomic_read(&root->fs_info->nr_async_submits) || + atomic_read(&root->fs_info->async_delalloc_pages)) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->nr_async_submits) == 0 && + atomic_read(&root->fs_info->async_delalloc_pages) == 0)); + } + atomic_dec(&root->fs_info->async_submit_draining); + + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = 0; + mutex_unlock(&inode->i_mutex); + } + return 0; err_reservations: @@ -1284,10 +1321,11 @@ out: return err; } -static int btrfs_ioctl_defrag(struct file *file) +static int btrfs_ioctl_defrag(struct file *file, void __user *argp) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_defrag_range_args *range; int ret; ret = mnt_want_write(file->f_path.mnt); @@ -1308,7 +1346,30 @@ static int btrfs_ioctl_defrag(struct file *file) ret = -EINVAL; goto out; } - btrfs_defrag_file(file); + + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) { + ret = -ENOMEM; + goto out; + } + + if (argp) { + if (copy_from_user(range, argp, + sizeof(*range))) { + ret = -EFAULT; + kfree(range); + } + /* compression requires us to start the IO */ + if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + range->flags |= BTRFS_DEFRAG_RANGE_START_IO; + range->extent_thresh = (u32)-1; + } + } else { + /* the rest are all set to zero by kzalloc */ + range->len = (u64)-1; + } + btrfs_defrag_file(file, range); + kfree(range); break; } out: @@ -1831,7 +1892,9 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEFAULT_SUBVOL: return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: - return btrfs_ioctl_defrag(file); + return btrfs_ioctl_defrag(file, NULL); + case BTRFS_IOC_DEFRAG_RANGE: + return btrfs_ioctl_defrag(file, argp); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(root, argp); case BTRFS_IOC_ADD_DEV: diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index f1923e0260e3..2d64a65842f3 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -99,6 +99,35 @@ struct btrfs_ioctl_clone_range_args { __u64 dest_offset; }; +/* flags for the defrag range ioctl */ +#define BTRFS_DEFRAG_RANGE_COMPRESS 1 +#define BTRFS_DEFRAG_RANGE_START_IO 2 + +struct btrfs_ioctl_defrag_range_args { + /* start of the defrag operation */ + __u64 start; + + /* number of bytes to defrag, use (u64)-1 to say all */ + __u64 len; + + /* + * flags for the operation, which can include turning + * on compression for this one defrag + */ + __u64 flags; + + /* + * any extent bigger than this will be considered + * already defragged. Use 0 to take the kernel default + * Use 1 to say every single extent must be rewritten + */ + __u32 extent_thresh; + + /* spare for later */ + __u32 unused[5]; +}; + + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -130,6 +159,8 @@ struct btrfs_ioctl_clone_range_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ struct btrfs_ioctl_vol_args) +#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \ + struct btrfs_ioctl_defrag_range_args) #define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \ struct btrfs_ioctl_search_args) #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \ -- cgit v1.2.3 From 0be2e98173f8badd5ccc7c2e994891746ba1caf4 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 11 Feb 2010 08:06:58 +0000 Subject: btrfs: fix btrfs_mkdir goto for no free objectids btrfs_mkdir() must jump to the place of ending transaction after btrfs_find_free_objectid() failed. Or this transaction can't end. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3657925c2461..50ce8840a99b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4508,7 +4508,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); if (err) { err = -ENOSPC; - goto out_unlock; + goto out_fail; } inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, -- cgit v1.2.3 From 5a1a3df1f6c86926cfe8657e6f9b4b4c2f467d60 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 2 Feb 2010 20:51:14 +0000 Subject: Btrfs: cache ordered extent when completing io When finishing io we run btrfs_dec_test_ordered_pending, and then immediately run btrfs_lookup_ordered_extent, but btrfs_dec_test_ordered_pending does that already, so we're searching twice when we don't have to. This patch lets us pass a btrfs_ordered_extent in to btrfs_dec_test_ordered_pending so if we do complete io on that ordered extent we can just use the one we found then instead of having to do another btrfs_lookup_ordered_extent. This made my fio job with the other patch go from 24 mb/s to 29 mb/s. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 ++--- fs/btrfs/ordered-data.c | 7 ++++++- fs/btrfs/ordered-data.h | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 50ce8840a99b..1824dda1d351 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1698,11 +1698,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) int compressed = 0; int ret; - ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); + ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, + end - start + 1); if (!ret) return 0; - - ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d56f732ba95e..a8ffecd0b491 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -232,11 +232,12 @@ int btrfs_add_ordered_sum(struct inode *inode, * to make sure this function only returns 1 once for a given ordered extent. */ int btrfs_dec_test_ordered_pending(struct inode *inode, + struct btrfs_ordered_extent **cached, u64 file_offset, u64 io_size) { struct btrfs_ordered_inode_tree *tree; struct rb_node *node; - struct btrfs_ordered_extent *entry; + struct btrfs_ordered_extent *entry = NULL; int ret; tree = &BTRFS_I(inode)->ordered_tree; @@ -264,6 +265,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, else ret = 1; out: + if (!ret && cached && entry) { + *cached = entry; + atomic_inc(&entry->refs); + } spin_unlock(&tree->lock); return ret == 0; } diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index bfbcebbb0adc..c82f76a9f040 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -137,7 +137,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); int btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry); int btrfs_dec_test_ordered_pending(struct inode *inode, - u64 file_offset, u64 io_size); + struct btrfs_ordered_extent **cached, + u64 file_offset, u64 io_size); int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, u64 start, u64 len, u64 disk_len, int tyep); int btrfs_add_ordered_sum(struct inode *inode, -- cgit v1.2.3 From 2ac55d41b5d6bf49e76bc85db5431240617e2f8f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 3 Feb 2010 19:33:23 +0000 Subject: Btrfs: cache the extent state everywhere we possibly can V2 This patch just goes through and fixes everybody that does lock_extent() blah unlock_extent() to use lock_extent_bits() blah unlock_extent_cached() and pass around a extent_state so we only have to do the searches once per function. This gives me about a 3 mb/s boots on my random write test. I have not converted some things, like the relocation and ioctl's, since they aren't heavily used and the relocation stuff is in the middle of being re-written. I also changed the clear_extent_bit() to only unset the cached state if we are clearing EXTENT_LOCKED and related stuff, so we can do things like this lock_extent_bits() clear delalloc bits unlock_extent_cached() without losing our cached state. I tested this thoroughly and turned on LEAK_DEBUG to make sure we weren't leaking extent states, everything worked out fine. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 +- fs/btrfs/disk-io.c | 15 ++++--- fs/btrfs/extent-tree.c | 11 +++-- fs/btrfs/extent_io.c | 61 +++++++++++++++++---------- fs/btrfs/extent_io.h | 10 +++-- fs/btrfs/file.c | 23 ++++++---- fs/btrfs/inode.c | 111 ++++++++++++++++++++++++++++++------------------- fs/btrfs/ioctl.c | 2 +- fs/btrfs/relocation.c | 2 +- 9 files changed, 148 insertions(+), 90 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3a36b1fb553a..3f704a816137 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2311,7 +2311,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, u32 min_type); int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + struct extent_state **cached_state); int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0427183e3e05..11d0ad30e203 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -263,13 +263,15 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, static int verify_parent_transid(struct extent_io_tree *io_tree, struct extent_buffer *eb, u64 parent_transid) { + struct extent_state *cached_state = NULL; int ret; if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 0; - lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); - if (extent_buffer_uptodate(io_tree, eb) && + lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, + 0, &cached_state, GFP_NOFS); + if (extent_buffer_uptodate(io_tree, eb, cached_state) && btrfs_header_generation(eb) == parent_transid) { ret = 0; goto out; @@ -282,10 +284,10 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, (unsigned long long)btrfs_header_generation(eb)); } ret = 1; - clear_extent_buffer_uptodate(io_tree, eb); + clear_extent_buffer_uptodate(io_tree, eb, &cached_state); out: - unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); + unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, + &cached_state, GFP_NOFS); return ret; } @@ -2497,7 +2499,8 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) int ret; struct inode *btree_inode = buf->first_page->mapping->host; - ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); + ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, + NULL); if (!ret) return ret; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 559f72489b3b..1727b26fb194 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6561,6 +6561,7 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root, struct btrfs_key key; struct inode *inode = NULL; struct btrfs_file_extent_item *fi; + struct extent_state *cached_state = NULL; u64 num_bytes; u64 skip_objectid = 0; u32 nritems; @@ -6589,12 +6590,14 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root, } num_bytes = btrfs_file_extent_num_bytes(leaf, fi); - lock_extent(&BTRFS_I(inode)->io_tree, key.offset, - key.offset + num_bytes - 1, GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset, + key.offset + num_bytes - 1, 0, &cached_state, + GFP_NOFS); btrfs_drop_extent_cache(inode, key.offset, key.offset + num_bytes - 1, 1); - unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, - key.offset + num_bytes - 1, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset, + key.offset + num_bytes - 1, &cached_state, + GFP_NOFS); cond_resched(); } iput(inode); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3c17c9eb0d98..c99121ac5d6b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -513,7 +513,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u64 last_end; int err; int set = 0; + int clear = 0; + if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) + clear = 1; again: if (!prealloc && (mask & __GFP_WAIT)) { prealloc = alloc_extent_state(mask); @@ -524,14 +527,20 @@ again: spin_lock(&tree->lock); if (cached_state) { cached = *cached_state; - *cached_state = NULL; - cached_state = NULL; + + if (clear) { + *cached_state = NULL; + cached_state = NULL; + } + if (cached && cached->tree && cached->start == start) { - atomic_dec(&cached->refs); + if (clear) + atomic_dec(&cached->refs); state = cached; goto hit_next; } - free_extent_state(cached); + if (clear) + free_extent_state(cached); } /* * this search will find the extents that end after @@ -946,11 +955,11 @@ int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, } int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) + struct extent_state **cached_state, gfp_t mask) { return set_extent_bit(tree, start, end, EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, - 0, NULL, NULL, mask); + 0, NULL, cached_state, mask); } int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, @@ -984,10 +993,11 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, } static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, - u64 end, gfp_t mask) + u64 end, struct extent_state **cached_state, + gfp_t mask) { return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, - NULL, mask); + cached_state, mask); } int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) @@ -1727,7 +1737,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) } if (!uptodate) { - clear_extent_uptodate(tree, start, end, GFP_NOFS); + clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); ClearPageUptodate(page); SetPageError(page); } @@ -2710,6 +2720,7 @@ int extent_readpages(struct extent_io_tree *tree, int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset) { + struct extent_state *cached_state = NULL; u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); u64 end = start + PAGE_CACHE_SIZE - 1; size_t blocksize = page->mapping->host->i_sb->s_blocksize; @@ -2718,12 +2729,12 @@ int extent_invalidatepage(struct extent_io_tree *tree, if (start > end) return 0; - lock_extent(tree, start, end, GFP_NOFS); + lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS); wait_on_page_writeback(page); clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, - 1, 1, NULL, GFP_NOFS); + 1, 1, &cached_state, GFP_NOFS); return 0; } @@ -2926,16 +2937,17 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock, get_extent_t *get_extent) { struct inode *inode = mapping->host; + struct extent_state *cached_state = NULL; u64 start = iblock << inode->i_blkbits; sector_t sector = 0; size_t blksize = (1 << inode->i_blkbits); struct extent_map *em; - lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, - GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, + 0, &cached_state, GFP_NOFS); em = get_extent(inode, NULL, 0, start, blksize, 0); - unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, - GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, + start + blksize - 1, &cached_state, GFP_NOFS); if (!em || IS_ERR(em)) return 0; @@ -2957,6 +2969,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u32 flags = 0; u64 disko = 0; struct extent_map *em = NULL; + struct extent_state *cached_state = NULL; int end = 0; u64 em_start = 0, em_len = 0; unsigned long emflags; @@ -2965,8 +2978,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (len == 0) return -EINVAL; - lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, - GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0, + &cached_state, GFP_NOFS); em = get_extent(inode, NULL, 0, off, max - off, 0); if (!em) goto out; @@ -3029,8 +3042,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, out_free: free_extent_map(em); out: - unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, - GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len, + &cached_state, GFP_NOFS); return ret; } @@ -3270,7 +3283,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, } int clear_extent_buffer_uptodate(struct extent_io_tree *tree, - struct extent_buffer *eb) + struct extent_buffer *eb, + struct extent_state **cached_state) { unsigned long i; struct page *page; @@ -3280,7 +3294,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - GFP_NOFS); + cached_state, GFP_NOFS); for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) @@ -3340,7 +3354,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, } int extent_buffer_uptodate(struct extent_io_tree *tree, - struct extent_buffer *eb) + struct extent_buffer *eb, + struct extent_state *cached_state) { int ret = 0; unsigned long num_pages; @@ -3352,7 +3367,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, return 1; ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1, NULL); + EXTENT_UPTODATE, 1, cached_state); if (ret) return ret; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 36de250a7b2b..bbab4813646f 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -163,6 +163,8 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits, struct extent_state **cached, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); +int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached, gfp_t mask); int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, @@ -196,7 +198,7 @@ int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask); + struct extent_state **cached_state, gfp_t mask); int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, @@ -281,9 +283,11 @@ int test_extent_buffer_dirty(struct extent_io_tree *tree, int set_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_io_tree *tree, - struct extent_buffer *eb); + struct extent_buffer *eb, + struct extent_state **cached_state); int extent_buffer_uptodate(struct extent_io_tree *tree, - struct extent_buffer *eb); + struct extent_buffer *eb, + struct extent_state *cached_state); int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **token, char **map, unsigned long *map_start, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a7fd9f3a750a..d146dde7efb6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -123,7 +123,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, root->sectorsize - 1) & ~((u64)root->sectorsize - 1); end_of_last_block = start_pos + num_bytes - 1; - err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); + err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, + NULL); if (err) return err; @@ -753,6 +754,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, loff_t pos, unsigned long first_index, unsigned long last_index, size_t write_bytes) { + struct extent_state *cached_state = NULL; int i; unsigned long index = pos >> PAGE_CACHE_SHIFT; struct inode *inode = fdentry(file)->d_inode; @@ -781,16 +783,18 @@ again: } if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; - lock_extent(&BTRFS_I(inode)->io_tree, - start_pos, last_pos - 1, GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, 0, &cached_state, + GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, last_pos - 1); if (ordered && ordered->file_offset + ordered->len > start_pos && ordered->file_offset < last_pos) { btrfs_put_ordered_extent(ordered); - unlock_extent(&BTRFS_I(inode)->io_tree, - start_pos, last_pos - 1, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, + &cached_state, GFP_NOFS); for (i = 0; i < num_pages; i++) { unlock_page(pages[i]); page_cache_release(pages[i]); @@ -802,12 +806,13 @@ again: if (ordered) btrfs_put_ordered_extent(ordered); - clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, + clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos, last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, + EXTENT_DO_ACCOUNTING, 0, 0, &cached_state, GFP_NOFS); - unlock_extent(&BTRFS_I(inode)->io_tree, - start_pos, last_pos - 1, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + start_pos, last_pos - 1, &cached_state, + GFP_NOFS); } for (i = 0; i < num_pages; i++) { clear_page_dirty_for_io(pages[i]); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1824dda1d351..2a337a09c650 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -573,8 +573,8 @@ retry: unsigned long nr_written = 0; lock_extent(io_tree, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, GFP_NOFS); + async_extent->start + + async_extent->ram_size - 1, GFP_NOFS); /* allocate blocks */ ret = cow_file_range(inode, async_cow->locked_page, @@ -1512,12 +1512,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, return 0; } -int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) +int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, + struct extent_state **cached_state) { if ((end & (PAGE_CACHE_SIZE - 1)) == 0) WARN_ON(1); return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, - GFP_NOFS); + cached_state, GFP_NOFS); } /* see btrfs_writepage_start_hook for details on why this is required */ @@ -1530,6 +1531,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; struct page *page; struct inode *inode; u64 page_start; @@ -1548,7 +1550,8 @@ again: page_start = page_offset(page); page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; - lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, + &cached_state, GFP_NOFS); /* already ordered? We're done */ if (PagePrivate2(page)) @@ -1556,17 +1559,18 @@ again: ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { - unlock_extent(&BTRFS_I(inode)->io_tree, page_start, - page_end, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, + page_end, &cached_state, GFP_NOFS); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); goto again; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); ClearPageChecked(page); out: - unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, + &cached_state, GFP_NOFS); out_page: unlock_page(page); page_cache_release(page); @@ -1695,6 +1699,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) struct btrfs_trans_handle *trans; struct btrfs_ordered_extent *ordered_extent = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_state *cached_state = NULL; int compressed = 0; int ret; @@ -1716,9 +1721,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) goto out; } - lock_extent(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - GFP_NOFS); + lock_extent_bits(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + ordered_extent->len - 1, + 0, &cached_state, GFP_NOFS); trans = btrfs_join_transaction(root, 1); @@ -1745,9 +1750,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->len); BUG_ON(ret); } - unlock_extent(io_tree, ordered_extent->file_offset, - ordered_extent->file_offset + ordered_extent->len - 1, - GFP_NOFS); + unlock_extent_cached(io_tree, ordered_extent->file_offset, + ordered_extent->file_offset + + ordered_extent->len - 1, &cached_state, GFP_NOFS); + add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); @@ -3084,6 +3090,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; char *kaddr; u32 blocksize = root->sectorsize; pgoff_t index = from >> PAGE_CACHE_SHIFT; @@ -3130,12 +3137,14 @@ again: } wait_on_page_writeback(page); - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); set_page_extent_mapped(page); ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); unlock_page(page); page_cache_release(page); btrfs_start_ordered_extent(inode, ordered, 1); @@ -3143,13 +3152,15 @@ again: goto again; } - clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, + clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, - GFP_NOFS); + 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, page_start, page_end); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state); if (ret) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); goto out_unlock; } @@ -3162,7 +3173,8 @@ again: } ClearPageChecked(page); set_page_dirty(page); - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, &cached_state, + GFP_NOFS); out_unlock: if (ret) @@ -3180,6 +3192,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em; + struct extent_state *cached_state = NULL; u64 mask = root->sectorsize - 1; u64 hole_start = (inode->i_size + mask) & ~mask; u64 block_end = (size + mask) & ~mask; @@ -3195,11 +3208,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct btrfs_ordered_extent *ordered; btrfs_wait_ordered_range(inode, hole_start, block_end - hole_start); - lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + lock_extent_bits(io_tree, hole_start, block_end - 1, 0, + &cached_state, GFP_NOFS); ordered = btrfs_lookup_ordered_extent(inode, hole_start); if (!ordered) break; - unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + unlock_extent_cached(io_tree, hole_start, block_end - 1, + &cached_state, GFP_NOFS); btrfs_put_ordered_extent(ordered); } @@ -3244,7 +3259,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) break; } - unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); + unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, + GFP_NOFS); return err; } @@ -4985,6 +5001,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) { struct extent_io_tree *tree; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; u64 page_start = page_offset(page); u64 page_end = page_start + PAGE_CACHE_SIZE - 1; @@ -5003,7 +5020,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) btrfs_releasepage(page, GFP_NOFS); return; } - lock_extent(tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); ordered = btrfs_lookup_ordered_extent(page->mapping->host, page_offset(page)); if (ordered) { @@ -5014,7 +5032,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) clear_extent_bit(tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, - NULL, GFP_NOFS); + &cached_state, GFP_NOFS); /* * whoever cleared the private bit is responsible * for the finish_ordered_io @@ -5024,11 +5042,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) page_start, page_end); } btrfs_put_ordered_extent(ordered); - lock_extent(tree, page_start, page_end, GFP_NOFS); + cached_state = NULL; + lock_extent_bits(tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); } clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); + EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS); __btrfs_releasepage(page, GFP_NOFS); ClearPageChecked(page); @@ -5061,6 +5081,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_ordered_extent *ordered; + struct extent_state *cached_state = NULL; char *kaddr; unsigned long zero_start; loff_t size; @@ -5099,7 +5120,8 @@ again: } wait_on_page_writeback(page); - lock_extent(io_tree, page_start, page_end, GFP_NOFS); + lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state, + GFP_NOFS); set_page_extent_mapped(page); /* @@ -5108,7 +5130,8 @@ again: */ ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); unlock_page(page); btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); @@ -5122,13 +5145,15 @@ again: * is probably a better way to do this, but for now keep consistent with * prepare_pages in the normal write path. */ - clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, + clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, - GFP_NOFS); + 0, 0, &cached_state, GFP_NOFS); - ret = btrfs_set_extent_delalloc(inode, page_start, page_end); + ret = btrfs_set_extent_delalloc(inode, page_start, page_end, + &cached_state); if (ret) { - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, + &cached_state, GFP_NOFS); ret = VM_FAULT_SIGBUS; btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); goto out_unlock; @@ -5154,7 +5179,7 @@ again: BTRFS_I(inode)->last_trans = root->fs_info->generation; BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; - unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); out_unlock: btrfs_unreserve_metadata_for_delalloc(root, inode, 1); @@ -5833,6 +5858,7 @@ stop_trans: static long btrfs_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) { + struct extent_state *cached_state = NULL; u64 cur_offset; u64 last_byte; u64 alloc_start; @@ -5871,16 +5897,17 @@ static long btrfs_fallocate(struct inode *inode, int mode, /* the extent lock is ordered inside the running * transaction */ - lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - GFP_NOFS); + lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, + locked_end, 0, &cached_state, GFP_NOFS); ordered = btrfs_lookup_first_ordered_extent(inode, alloc_end - 1); if (ordered && ordered->file_offset + ordered->len > alloc_start && ordered->file_offset < alloc_end) { btrfs_put_ordered_extent(ordered); - unlock_extent(&BTRFS_I(inode)->io_tree, - alloc_start, locked_end, GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, + alloc_start, locked_end, + &cached_state, GFP_NOFS); /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -5922,8 +5949,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, break; } } - unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, - GFP_NOFS); + unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, + &cached_state, GFP_NOFS); btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, alloc_end - alloc_start); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d866b460c26e..9aaba6e472d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -673,7 +673,7 @@ again: page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, GFP_NOFS); - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d52759daa53f..0b23942cbc0d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2659,7 +2659,7 @@ static int relocate_file_extent_cluster(struct inode *inode, EXTENT_BOUNDARY, GFP_NOFS); nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); set_page_dirty(page); dirty_page++; -- cgit v1.2.3