From 9b64f57ddf8673d29fafb3405d4aa1e93f5a4cd7 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 3 Mar 2017 10:55:11 +0200 Subject: btrfs: convert btrfs_transaction.use_count from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index eb1ee7b6f532..e4aa64c263f9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4615,7 +4615,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) t = list_first_entry(&fs_info->trans_list, struct btrfs_transaction, list); if (t->state >= TRANS_STATE_COMMIT_START) { - atomic_inc(&t->use_count); + refcount_inc(&t->use_count); spin_unlock(&fs_info->trans_lock); btrfs_wait_for_commit(fs_info, t->transid); btrfs_put_transaction(t); -- cgit v1.2.3 From 6df8cdf5bda221f268ac23940bce589ad176993d Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 3 Mar 2017 10:55:15 +0200 Subject: btrfs: convert btrfs_delayed_ref_node.refs from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David Sterba --- fs/btrfs/backref.c | 2 +- fs/btrfs/delayed-ref.c | 8 ++++---- fs/btrfs/delayed-ref.h | 8 +++++--- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 6 +++--- 5 files changed, 14 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7699e16784d3..116338344224 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1286,7 +1286,7 @@ again: head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 6eb80952efb3..be70d90dfee5 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -164,7 +164,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, if (mutex_trylock(&head->mutex)) return 0; - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); mutex_lock(&head->mutex); @@ -590,7 +590,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; /* first set the basic ref node struct up */ - atomic_set(&ref->refs, 1); + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = count_mod; @@ -682,7 +682,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, delayed_refs = &trans->transaction->delayed_refs; /* first set the basic ref node struct up */ - atomic_set(&ref->refs, 1); + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = 1; @@ -739,7 +739,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, seq = atomic64_read(&fs_info->tree_mod_seq); /* first set the basic ref node struct up */ - atomic_set(&ref->refs, 1); + refcount_set(&ref->refs, 1); ref->bytenr = bytenr; ref->num_bytes = num_bytes; ref->ref_mod = 1; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 0e537f98f1a1..c0264ff01b53 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -18,6 +18,8 @@ #ifndef __DELAYED_REF__ #define __DELAYED_REF__ +#include + /* these are the possible values of struct btrfs_delayed_ref_node->action */ #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ @@ -53,7 +55,7 @@ struct btrfs_delayed_ref_node { u64 seq; /* ref count on this data structure */ - atomic_t refs; + refcount_t refs; /* * how many refs is this entry adding or deleting. For @@ -220,8 +222,8 @@ btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) { - WARN_ON(atomic_read(&ref->refs) == 0); - if (atomic_dec_and_test(&ref->refs)) { + WARN_ON(refcount_read(&ref->refs) == 0); + if (refcount_dec_and_test(&ref->refs)) { WARN_ON(ref->in_tree); switch (ref->type) { case BTRFS_TREE_BLOCK_REF_KEY: diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e4aa64c263f9..3748bc54a6ab 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4343,7 +4343,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, head = rb_entry(node, struct btrfs_delayed_ref_head, href_node); if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); mutex_lock(&head->mutex); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5c84eea60703..5a7ddcff406b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -892,7 +892,7 @@ search_again: head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); @@ -2980,7 +2980,7 @@ again: struct btrfs_delayed_ref_node *ref; ref = &head->node; - atomic_inc(&ref->refs); + refcount_inc(&ref->refs); spin_unlock(&delayed_refs->lock); /* @@ -3057,7 +3057,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root, } if (!mutex_trylock(&head->mutex)) { - atomic_inc(&head->node.refs); + refcount_inc(&head->node.refs); spin_unlock(&delayed_refs->lock); btrfs_release_path(path); -- cgit v1.2.3 From 0700cea7c8b387c8c6bc4de79b197baa0b3fc4a3 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 3 Mar 2017 10:55:18 +0200 Subject: btrfs: convert btrfs_root.refs from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/disk-io.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cdfc2a46448b..285566cc2f7d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1222,7 +1222,7 @@ struct btrfs_root { dev_t anon_dev; spinlock_t root_item_lock; - atomic_t refs; + refcount_t refs; struct mutex delalloc_mutex; spinlock_t delalloc_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3748bc54a6ab..bd415e1dd114 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1340,7 +1340,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, atomic_set(&root->log_writers, 0); atomic_set(&root->log_batch, 0); atomic_set(&root->orphan_inodes, 0); - atomic_set(&root->refs, 1); + refcount_set(&root->refs, 1); atomic_set(&root->will_be_snapshoted, 0); atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2e0ec29bfd69..21f1ceb85b76 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -101,14 +101,14 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); */ static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root) { - if (atomic_inc_not_zero(&root->refs)) + if (refcount_inc_not_zero(&root->refs)) return root; return NULL; } static inline void btrfs_put_fs_root(struct btrfs_root *root) { - if (atomic_dec_and_test(&root->refs)) + if (refcount_dec_and_test(&root->refs)) kfree(root); } -- cgit v1.2.3 From 7ef70b4d9987a78d39c4e40a02093493333e5408 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 2 Mar 2017 18:54:52 +0100 Subject: btrfs: preallocate radix tree node for global readahead tree We can preallocate the node so insertion does not have to do that under the lock. The GFP flags for the global radix tree are initialized to GFP_NOFS & ~__GFP_DIRECT_RECLAIM but we can use GFP_KERNEL, because readahead is optional and not on any critical writeout path. Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/reada.c | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bd415e1dd114..12eba54877c2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2693,7 +2693,7 @@ int open_ctree(struct super_block *sb, fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ /* readahead state */ - INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); + INIT_RADIX_TREE(&fs_info->reada_tree, GFP_KERNEL); spin_lock_init(&fs_info->reada_lock); fs_info->thread_pool_size = min_t(unsigned long, diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index fdae8ca79401..a7fa4a5cb296 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -393,6 +393,10 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, goto error; } + ret = radix_tree_preload(GFP_KERNEL); + if (ret) + goto error; + /* insert extent in reada_tree + all per-device trees, all or nothing */ btrfs_dev_replace_lock(&fs_info->dev_replace, 0); spin_lock(&fs_info->reada_lock); @@ -402,13 +406,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info, re_exist->refcnt++; spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + radix_tree_preload_end(); goto error; } if (ret) { spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); + radix_tree_preload_end(); goto error; } + radix_tree_preload_end(); prev_dev = NULL; dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( &fs_info->dev_replace); -- cgit v1.2.3 From d48d71aa9977473b6515bb48933617a06cdc7be9 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 2 Mar 2017 19:43:30 +0100 Subject: btrfs: remove redundant parameter from btree_readahead_hook We can read fs_info from eb. Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 3 +-- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/reada.c | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 285566cc2f7d..7343bf7f2b35 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3672,8 +3672,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); void btrfs_reada_detach(void *handle); -int btree_readahead_hook(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int err); +int btree_readahead_hook(struct extent_buffer *eb, int err); static inline int is_fstree(u64 rootid) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 12eba54877c2..a9314fe494c9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -762,7 +762,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, err: if (reads_done && test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(fs_info, eb, ret); + btree_readahead_hook(eb, ret); if (ret) { /* @@ -787,7 +787,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) - btree_readahead_hook(eb->fs_info, eb, -EIO); + btree_readahead_hook(eb, -EIO); return -EIO; /* we fixed nothing */ } diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index a7fa4a5cb296..306e5108aac7 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -209,9 +209,9 @@ cleanup: return; } -int btree_readahead_hook(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int err) +int btree_readahead_hook(struct extent_buffer *eb, int err) { + struct btrfs_fs_info *fs_info = eb->fs_info; int ret = 0; struct reada_extent *re; -- cgit v1.2.3 From 13e88e1560d6014838e2dd9f8b9cf8ec9a8d86e6 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 6 Apr 2017 11:22:52 +0800 Subject: btrfs: delete unused member nobarriers The last consumer of nobarriers is removed by the commit [1] and sync won't fail with EOPNOTSUPP anymore. Thus, now when write cache is write through it just return success without actually transpiring such a request to the block device/lun. [1] commit b25de9d6da49b1a8760a89672283128aa8c78345 block: remove BIO_EOPNOTSUPP And, as the device/lun write cache state may change dynamically saving such as state won't help either. So deleting the member nobarriers. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 --- fs/btrfs/volumes.h | 1 - 2 files changed, 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a9314fe494c9..e070e463ad8b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3521,9 +3521,6 @@ static int write_dev_flush(struct btrfs_device *device, int wait) struct bio *bio; int ret = 0; - if (device->nobarriers) - return 0; - if (wait) { bio = device->flush_bio; if (!bio) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index def39b5a8d9e..c7d0fbc915ca 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -123,7 +123,6 @@ struct btrfs_device { struct list_head resized_list; /* for sending down flush barriers */ - int nobarriers; struct bio *flush_bio; struct completion flush_wait; -- cgit v1.2.3 From c2a9c7ab475bc3aaf06521a39ac65bc48c8cad4f Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Thu, 6 Apr 2017 11:22:53 +0800 Subject: btrfs: check if the device is flush capable The block layer call chain from submit_bio will check if the write cache is enabled for the given queue before submitting the flush. This will add a code to fail fast if its not. Signed-off-by: Anand Jain Reviewed-by: David Sterba [ updated changelog to reflect current code stat, blkdev_issue_flush is not used yet ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e070e463ad8b..ba7bd65693a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3518,9 +3518,13 @@ static void btrfs_end_empty_barrier(struct bio *bio) */ static int write_dev_flush(struct btrfs_device *device, int wait) { + struct request_queue *q = bdev_get_queue(device->bdev); struct bio *bio; int ret = 0; + if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) + return 0; + if (wait) { bio = device->flush_bio; if (!bio) -- cgit v1.2.3 From 9bcaaea7418d09691f1ffab5c49aacafe3eef9d0 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 4 May 2017 16:08:08 -0700 Subject: btrfs: fix the gfp_mask for the reada_zones radix tree Commits cc8385b59e17 and 7ef70b4d9987a7 added preallocation for the reada radix trees and also switched them over to GFP_KERNEL for the default gfp mask. Since we're doing radix tree insertions under spinlocks, we need to make sure the mask doesn't allow sleeping. This fix keeps the radix preallocation but switches back to the original gfp_mask. Reported-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/volumes.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ba7bd65693a3..683194242deb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2693,7 +2693,7 @@ int open_ctree(struct super_block *sb, fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */ /* readahead state */ - INIT_RADIX_TREE(&fs_info->reada_tree, GFP_KERNEL); + INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); spin_lock_init(&fs_info->reada_lock); fs_info->thread_pool_size = min_t(unsigned long, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 6cad3233181c..017b67daa3bb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -252,7 +252,7 @@ static struct btrfs_device *__alloc_device(void) atomic_set(&dev->reada_in_flight, 0); atomic_set(&dev->dev_stats_ccnt, 0); btrfs_device_data_ordered_init(dev); - INIT_RADIX_TREE(&dev->reada_zones, GFP_KERNEL); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); return dev; -- cgit v1.2.3