From 298cfd368364912076fd84fbceb0df864da42a67 Mon Sep 17 00:00:00 2001 From: Chandan Rajendra Date: Thu, 21 Jan 2016 15:55:59 +0530 Subject: Btrfs: Use (eb->start, seq) as search key for tree modification log In subpagesize-blocksize a page can map multiple extent buffers and hence using (page index, seq) as the search key is incorrect. For example, searching through tree modification log tree can return an entry associated with the first extent buffer mapped by the page (if such an entry exists), when we are actually searching for entries associated with extent buffers that are mapped at position 2 or more in the page. Reviewed-by: Liu Bo Signed-off-by: Chandan Rajendra Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 769e0ff1b4ce..f5ef7d171bb8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -311,7 +311,7 @@ struct tree_mod_root { struct tree_mod_elem { struct rb_node node; - u64 index; /* shifted logical */ + u64 logical; u64 seq; enum mod_log_op op; @@ -435,11 +435,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, /* * key order of the log: - * index -> sequence + * node/leaf start address -> sequence * - * the index is the shifted logical of the *new* root node for root replace - * operations, or the shifted logical of the affected block for all other - * operations. + * The 'start address' is the logical address of the *new* root node + * for root replace operations, or the logical address of the affected + * block for all other operations. * * Note: must be called with write lock (tree_mod_log_write_lock). */ @@ -460,9 +460,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) while (*new) { cur = container_of(*new, struct tree_mod_elem, node); parent = *new; - if (cur->index < tm->index) + if (cur->logical < tm->logical) new = &((*new)->rb_left); - else if (cur->index > tm->index) + else if (cur->logical > tm->logical) new = &((*new)->rb_right); else if (cur->seq < tm->seq) new = &((*new)->rb_left); @@ -523,7 +523,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot, if (!tm) return NULL; - tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->logical = eb->start; if (op != MOD_LOG_KEY_ADD) { btrfs_node_key(eb, &tm->key, slot); tm->blockptr = btrfs_node_blockptr(eb, slot); @@ -588,7 +588,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, goto free_tms; } - tm->index = eb->start >> PAGE_CACHE_SHIFT; + tm->logical = eb->start; tm->slot = src_slot; tm->move.dst_slot = dst_slot; tm->move.nr_items = nr_items; @@ -699,7 +699,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, goto free_tms; } - tm->index = new_root->start >> PAGE_CACHE_SHIFT; + tm->logical = new_root->start; tm->old_root.logical = old_root->start; tm->old_root.level = btrfs_header_level(old_root); tm->generation = btrfs_header_generation(old_root); @@ -739,16 +739,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct rb_node *node; struct tree_mod_elem *cur = NULL; struct tree_mod_elem *found = NULL; - u64 index = start >> PAGE_CACHE_SHIFT; tree_mod_log_read_lock(fs_info); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { cur = container_of(node, struct tree_mod_elem, node); - if (cur->index < index) { + if (cur->logical < start) { node = node->rb_left; - } else if (cur->index > index) { + } else if (cur->logical > start) { node = node->rb_right; } else if (cur->seq < min_seq) { node = node->rb_left; @@ -1230,9 +1229,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, return NULL; /* - * the very last operation that's logged for a root is the replacement - * operation (if it is replaced at all). this has the index of the *new* - * root, making it the very first operation that's logged for this root. + * the very last operation that's logged for a root is the + * replacement operation (if it is replaced at all). this has + * the logical address of the *new* root, making it the very + * first operation that's logged for this root. */ while (1) { tm = tree_mod_log_search_oldest(fs_info, root_logical, @@ -1336,7 +1336,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!next) break; tm = container_of(next, struct tree_mod_elem, node); - if (tm->index != first_tm->index) + if (tm->logical != first_tm->logical) break; } tree_mod_log_read_unlock(fs_info); -- cgit v1.2.3 From e780b0d1c1523ec8cd489c6910fb8c5ee452bb6c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 18 Jan 2016 18:42:13 +0100 Subject: btrfs: send: use GFP_KERNEL everywhere The send operation is not on the critical writeback path we don't need to use GFP_NOFS for allocations. All error paths are handled and the whole operation is restartable. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 2 +- fs/btrfs/send.c | 36 ++++++++++++++++++------------------ 2 files changed, 19 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 769e0ff1b4ce..00741eb94651 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -5361,7 +5361,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS); + tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL); if (!tmp_buf) { ret = -ENOMEM; goto out; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 63a6152be04b..d2e29925f1da 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -304,7 +304,7 @@ static struct fs_path *fs_path_alloc(void) { struct fs_path *p; - p = kmalloc(sizeof(*p), GFP_NOFS); + p = kmalloc(sizeof(*p), GFP_KERNEL); if (!p) return NULL; p->reversed = 0; @@ -363,11 +363,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) * First time the inline_buf does not suffice */ if (p->buf == p->inline_buf) { - tmp_buf = kmalloc(len, GFP_NOFS); + tmp_buf = kmalloc(len, GFP_KERNEL); if (tmp_buf) memcpy(tmp_buf, p->buf, old_buf_len); } else { - tmp_buf = krealloc(p->buf, len, GFP_NOFS); + tmp_buf = krealloc(p->buf, len, GFP_KERNEL); } if (!tmp_buf) return -ENOMEM; @@ -995,7 +995,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, * values are small. */ buf_len = PATH_MAX; - buf = kmalloc(buf_len, GFP_NOFS); + buf = kmalloc(buf_len, GFP_KERNEL); if (!buf) { ret = -ENOMEM; goto out; @@ -1042,7 +1042,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, buf = NULL; } else { char *tmp = krealloc(buf, buf_len, - GFP_NOFS | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!tmp) kfree(buf); @@ -1303,7 +1303,7 @@ static int find_extent_clone(struct send_ctx *sctx, /* We only use this path under the commit sem */ tmp_path->need_commit_sem = 0; - backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS); + backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL); if (!backref_ctx) { ret = -ENOMEM; goto out; @@ -1984,7 +1984,7 @@ static int name_cache_insert(struct send_ctx *sctx, nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)nce->ino); if (!nce_head) { - nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); + nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL); if (!nce_head) { kfree(nce); return -ENOMEM; @@ -2179,7 +2179,7 @@ out_cache: /* * Store the result of the lookup in the name cache. */ - nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS); + nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL); if (!nce) { ret = -ENOMEM; goto out; @@ -2315,7 +2315,7 @@ static int send_subvol_begin(struct send_ctx *sctx) if (!path) return -ENOMEM; - name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS); + name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL); if (!name) { btrfs_free_path(path); return -ENOMEM; @@ -2730,7 +2730,7 @@ static int __record_ref(struct list_head *head, u64 dir, { struct recorded_ref *ref; - ref = kmalloc(sizeof(*ref), GFP_NOFS); + ref = kmalloc(sizeof(*ref), GFP_KERNEL); if (!ref) return -ENOMEM; @@ -2755,7 +2755,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list) { struct recorded_ref *new; - new = kmalloc(sizeof(*ref), GFP_NOFS); + new = kmalloc(sizeof(*ref), GFP_KERNEL); if (!new) return -ENOMEM; @@ -2818,7 +2818,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) struct rb_node *parent = NULL; struct orphan_dir_info *entry, *odi; - odi = kmalloc(sizeof(*odi), GFP_NOFS); + odi = kmalloc(sizeof(*odi), GFP_KERNEL); if (!odi) return ERR_PTR(-ENOMEM); odi->ino = dir_ino; @@ -2973,7 +2973,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized) struct rb_node *parent = NULL; struct waiting_dir_move *entry, *dm; - dm = kmalloc(sizeof(*dm), GFP_NOFS); + dm = kmalloc(sizeof(*dm), GFP_KERNEL); if (!dm) return -ENOMEM; dm->ino = ino; @@ -3040,7 +3040,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, int exists = 0; int ret; - pm = kmalloc(sizeof(*pm), GFP_NOFS); + pm = kmalloc(sizeof(*pm), GFP_KERNEL); if (!pm) return -ENOMEM; pm->parent_ino = parent_ino; @@ -4280,7 +4280,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key, strncmp(name, ctx->name, name_len) == 0) { ctx->found_idx = num; ctx->found_data_len = data_len; - ctx->found_data = kmemdup(data, data_len, GFP_NOFS); + ctx->found_data = kmemdup(data, data_len, GFP_KERNEL); if (!ctx->found_data) return -ENOMEM; return 1; @@ -4481,7 +4481,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) while (index <= last_index) { unsigned cur_len = min_t(unsigned, len, PAGE_CACHE_SIZE - pg_offset); - page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); + page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); if (!page) { ret = -ENOMEM; break; @@ -5989,7 +5989,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) goto out; } - sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); + sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL); if (!sctx) { ret = -ENOMEM; goto out; @@ -5997,7 +5997,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) INIT_LIST_HEAD(&sctx->new_refs); INIT_LIST_HEAD(&sctx->deleted_refs); - INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); + INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); INIT_LIST_HEAD(&sctx->name_cache_list); sctx->flags = arg->flags; -- cgit v1.2.3 From 8f282f71eaee7ac979cdbe525f76daa0722798a8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 30 Mar 2016 16:01:12 +0200 Subject: btrfs: fallback to vmalloc in btrfs_compare_tree The allocation of node could fail if the memory is too fragmented for a given node size, practically observed with 64k. http://article.gmane.org/gmane.comp.file-systems.btrfs/54689 Reported-and-tested-by: Jean-Denis Girard Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 77592931ab4f..ec7928a27aaa 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -5361,10 +5362,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root, goto out; } - tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL); + tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL | __GFP_NOWARN); if (!tmp_buf) { - ret = -ENOMEM; - goto out; + tmp_buf = vmalloc(left_root->nodesize); + if (!tmp_buf) { + ret = -ENOMEM; + goto out; + } } left_path->search_commit_root = 1; @@ -5565,7 +5569,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, out: btrfs_free_path(left_path); btrfs_free_path(right_path); - kfree(tmp_buf); + kvfree(tmp_buf); return ret; } -- cgit v1.2.3