summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig22
-rw-r--r--fs/btrfs/backref.c88
-rw-r--r--fs/btrfs/backref.h3
-rw-r--r--fs/btrfs/btrfs_inode.h2
-rw-r--r--fs/btrfs/check-integrity.c2
-rw-r--r--fs/btrfs/compression.c14
-rw-r--r--fs/btrfs/compression.h2
-rw-r--r--fs/btrfs/ctree.c374
-rw-r--r--fs/btrfs/ctree.h153
-rw-r--r--fs/btrfs/delayed-inode.c217
-rw-r--r--fs/btrfs/delayed-inode.h2
-rw-r--r--fs/btrfs/delayed-ref.c30
-rw-r--r--fs/btrfs/delayed-ref.h1
-rw-r--r--fs/btrfs/dev-replace.c5
-rw-r--r--fs/btrfs/dir-item.c11
-rw-r--r--fs/btrfs/disk-io.c492
-rw-r--r--fs/btrfs/disk-io.h5
-rw-r--r--fs/btrfs/extent-tree.c720
-rw-r--r--fs/btrfs/extent_io.c486
-rw-r--r--fs/btrfs/extent_io.h48
-rw-r--r--fs/btrfs/extent_map.c23
-rw-r--r--fs/btrfs/extent_map.h3
-rw-r--r--fs/btrfs/file-item.c108
-rw-r--r--fs/btrfs/file.c51
-rw-r--r--fs/btrfs/free-space-cache.c639
-rw-r--r--fs/btrfs/free-space-cache.h7
-rw-r--r--fs/btrfs/inode-item.c17
-rw-r--r--fs/btrfs/inode-map.c8
-rw-r--r--fs/btrfs/inode.c297
-rw-r--r--fs/btrfs/ioctl.c136
-rw-r--r--fs/btrfs/locking.c4
-rw-r--r--fs/btrfs/locking.h1
-rw-r--r--fs/btrfs/ordered-data.c30
-rw-r--r--fs/btrfs/ordered-data.h3
-rw-r--r--fs/btrfs/print-tree.c9
-rw-r--r--fs/btrfs/print-tree.h2
-rw-r--r--fs/btrfs/qgroup.c853
-rw-r--r--fs/btrfs/raid56.c16
-rw-r--r--fs/btrfs/reada.c5
-rw-r--r--fs/btrfs/relocation.c186
-rw-r--r--fs/btrfs/root-tree.c7
-rw-r--r--fs/btrfs/scrub.c143
-rw-r--r--fs/btrfs/send.c42
-rw-r--r--fs/btrfs/send.h1
-rw-r--r--fs/btrfs/super.c109
-rw-r--r--fs/btrfs/transaction.c171
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/tree-log.c443
-rw-r--r--fs/btrfs/tree-log.h3
-rw-r--r--fs/btrfs/ulist.c58
-rw-r--r--fs/btrfs/ulist.h6
-rw-r--r--fs/btrfs/volumes.c186
-rw-r--r--fs/btrfs/volumes.h33
-rw-r--r--fs/btrfs/xattr.c4
54 files changed, 3969 insertions, 2315 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 9a8622a5b867..2b3b83296977 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -1,5 +1,5 @@
config BTRFS_FS
- tristate "Btrfs filesystem Unstable disk format"
+ tristate "Btrfs filesystem support"
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
@@ -52,3 +52,23 @@ config BTRFS_FS_CHECK_INTEGRITY
In most cases, unless you are a btrfs developer who needs
to verify the integrity of (super)-block write requests
during the run of a regression test, say N
+
+config BTRFS_FS_RUN_SANITY_TESTS
+ bool "Btrfs will run sanity tests upon loading"
+ depends on BTRFS_FS
+ help
+ This will run some basic sanity tests on the free space cache
+ code to make sure it is acting as it should. These are mostly
+ regression tests and are only really interesting to btrfs devlopers.
+
+ If unsure, say N.
+
+config BTRFS_DEBUG
+ bool "Btrfs debugging support"
+ depends on BTRFS_FS
+ help
+ Enable run-time debugging support for the btrfs filesystem. This may
+ enable additional and expensive checks with negative impact on
+ performance, or export extra information via sysfs.
+
+ If unsure, say N.
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index bd605c87adfd..290e347b6db3 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -352,6 +352,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
err = __resolve_indirect_ref(fs_info, search_commit_root,
time_seq, ref, parents,
extent_item_pos);
+ if (err == -ENOMEM)
+ goto out;
if (err)
continue;
@@ -367,7 +369,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
if (!new_ref) {
ret = -ENOMEM;
- break;
+ goto out;
}
memcpy(new_ref, ref, sizeof(*ref));
new_ref->parent = node->val;
@@ -377,7 +379,7 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
}
ulist_reinit(parents);
}
-
+out:
ulist_free(parents);
return ret;
}
@@ -421,7 +423,10 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
BUG_ON(!ref->wanted_disk_byte);
eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
fs_info->tree_root->leafsize, 0);
- BUG_ON(!eb);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ return -EIO;
+ }
btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0)
btrfs_item_key_to_cpu(eb, &ref->key_for_search, 0);
@@ -443,7 +448,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
* having a parent).
* mode = 2: merge identical parents
*/
-static int __merge_refs(struct list_head *head, int mode)
+static void __merge_refs(struct list_head *head, int mode)
{
struct list_head *pos1;
@@ -489,7 +494,6 @@ static int __merge_refs(struct list_head *head, int mode)
}
}
- return 0;
}
/*
@@ -582,7 +586,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
default:
WARN_ON(1);
}
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
return 0;
@@ -680,7 +685,8 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
default:
WARN_ON(1);
}
- BUG_ON(ret);
+ if (ret)
+ return ret;
ptr += btrfs_extent_inline_ref_size(type);
}
@@ -762,7 +768,9 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
default:
WARN_ON(1);
}
- BUG_ON(ret);
+ if (ret)
+ return ret;
+
}
return ret;
@@ -880,18 +888,14 @@ again:
if (ret)
goto out;
- ret = __merge_refs(&prefs, 1);
- if (ret)
- goto out;
+ __merge_refs(&prefs, 1);
ret = __resolve_indirect_refs(fs_info, search_commit_root, time_seq,
&prefs, extent_item_pos);
if (ret)
goto out;
- ret = __merge_refs(&prefs, 2);
- if (ret)
- goto out;
+ __merge_refs(&prefs, 2);
while (!list_empty(&prefs)) {
ref = list_first_entry(&prefs, struct __prelim_ref, list);
@@ -900,7 +904,8 @@ again:
if (ref->count && ref->root_id && ref->parent == 0) {
/* no parent == root of tree */
ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS);
- BUG_ON(ret < 0);
+ if (ret < 0)
+ goto out;
}
if (ref->count && ref->parent) {
struct extent_inode_elem *eie = NULL;
@@ -911,7 +916,11 @@ again:
info_level);
eb = read_tree_block(fs_info->extent_root,
ref->parent, bsz, 0);
- BUG_ON(!eb);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ ret = -EIO;
+ goto out;
+ }
ret = find_extent_in_eb(eb, bytenr,
*extent_item_pos, &eie);
ref->inode_list = eie;
@@ -920,6 +929,8 @@ again:
ret = ulist_add_merge(refs, ref->parent,
(uintptr_t)ref->inode_list,
(u64 *)&eie, GFP_NOFS);
+ if (ret < 0)
+ goto out;
if (!ret && extent_item_pos) {
/*
* we've recorded that parent, so we must extend
@@ -930,7 +941,6 @@ again:
eie = eie->next;
eie->next = ref->inode_list;
}
- BUG_ON(ret < 0);
}
kfree(ref);
}
@@ -1180,6 +1190,20 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
return ret;
}
+/*
+ * this iterates to turn a name (from iref/extref) into a full filesystem path.
+ * Elements of the path are separated by '/' and the path is guaranteed to be
+ * 0-terminated. the path is only given within the current file system.
+ * Therefore, it never starts with a '/'. the caller is responsible to provide
+ * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
+ * the start point of the resulting string is returned. this pointer is within
+ * dest, normally.
+ * in case the path buffer would overflow, the pointer is decremented further
+ * as if output was written to the buffer, though no more output is actually
+ * generated. that way, the caller can determine how much space would be
+ * required for the path to fit into the buffer. in that case, the returned
+ * value will be smaller than dest. callers must check this!
+ */
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
@@ -1249,32 +1273,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
}
/*
- * this iterates to turn a btrfs_inode_ref into a full filesystem path. elements
- * of the path are separated by '/' and the path is guaranteed to be
- * 0-terminated. the path is only given within the current file system.
- * Therefore, it never starts with a '/'. the caller is responsible to provide
- * "size" bytes in "dest". the dest buffer will be filled backwards. finally,
- * the start point of the resulting string is returned. this pointer is within
- * dest, normally.
- * in case the path buffer would overflow, the pointer is decremented further
- * as if output was written to the buffer, though no more output is actually
- * generated. that way, the caller can determine how much space would be
- * required for the path to fit into the buffer. in that case, the returned
- * value will be smaller than dest. callers must check this!
- */
-char *btrfs_iref_to_path(struct btrfs_root *fs_root,
- struct btrfs_path *path,
- struct btrfs_inode_ref *iref,
- struct extent_buffer *eb_in, u64 parent,
- char *dest, u32 size)
-{
- return btrfs_ref_to_path(fs_root, path,
- btrfs_inode_ref_name_len(eb_in, iref),
- (unsigned long)(iref + 1),
- eb_in, parent, dest, size);
-}
-
-/*
* this makes the path point to (logical EXTENT_ITEM *)
* returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
* tree blocks and <0 on error.
@@ -1461,8 +1459,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
iterate_extent_inodes_t *iterate, void *ctx)
{
int ret;
- struct list_head data_refs = LIST_HEAD_INIT(data_refs);
- struct list_head shared_refs = LIST_HEAD_INIT(shared_refs);
struct btrfs_trans_handle *trans;
struct ulist *refs = NULL;
struct ulist *roots = NULL;
@@ -1508,11 +1504,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
iterate, ctx);
}
ulist_free(roots);
- roots = NULL;
}
free_leaf_list(refs);
- ulist_free(roots);
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 310a7f6d09b1..0f446d7ca2c0 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -59,9 +59,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
-char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
- struct btrfs_inode_ref *iref, struct extent_buffer *eb,
- u64 parent, char *dest, u32 size);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d9b97d4960e6..08b286b2a2c5 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -93,7 +93,7 @@ struct btrfs_inode {
unsigned long runtime_flags;
- /* Keep track of who's O_SYNC/fsycing currently */
+ /* Keep track of who's O_SYNC/fsyncing currently */
atomic_t sync_writers;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 18af6f48781a..1431a6965017 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
unsigned int j;
DECLARE_COMPLETION_ONSTACK(complete);
- bio = bio_alloc(GFP_NOFS, num_pages - i);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
printk(KERN_INFO
"btrfsic: bio_alloc() for %u pages failed!\n",
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 15b94089abc4..b189bd1e7a3e 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -82,6 +82,10 @@ struct compressed_bio {
u32 sums;
};
+static int btrfs_decompress_biovec(int type, struct page **pages_in,
+ u64 disk_start, struct bio_vec *bvec,
+ int vcnt, size_t srclen);
+
static inline int compressed_bio_size(struct btrfs_root *root,
unsigned long disk_size)
{
@@ -106,7 +110,6 @@ static int check_compressed_csum(struct inode *inode,
u64 disk_start)
{
int ret;
- struct btrfs_root *root = BTRFS_I(inode)->root;
struct page *page;
unsigned long i;
char *kaddr;
@@ -121,7 +124,7 @@ static int check_compressed_csum(struct inode *inode,
csum = ~(u32)0;
kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
+ csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
btrfs_csum_final(csum, (char *)&csum);
kunmap_atomic(kaddr);
@@ -739,7 +742,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
-struct btrfs_compress_op *btrfs_compress_op[] = {
+static struct btrfs_compress_op *btrfs_compress_op[] = {
&btrfs_zlib_compress,
&btrfs_lzo_compress,
};
@@ -910,8 +913,9 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
* be contiguous. They all correspond to the range of bytes covered by
* the compressed extent.
*/
-int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
- struct bio_vec *bvec, int vcnt, size_t srclen)
+static int btrfs_decompress_biovec(int type, struct page **pages_in,
+ u64 disk_start, struct bio_vec *bvec,
+ int vcnt, size_t srclen)
{
struct list_head *workspace;
int ret;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 9afb0a62ae82..0c803b4fbf93 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -30,8 +30,6 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
unsigned long *total_in,
unsigned long *total_out,
unsigned long max_out);
-int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
- struct bio_vec *bvec, int vcnt, size_t srclen);
int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen);
int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ecd25a1b4e51..17dffe33e8d0 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,16 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot);
+static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+ int level, int slot);
static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
-struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
- u32 blocksize, u64 parent_transid,
- u64 time_seq);
-struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- u64 time_seq);
+static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -208,7 +203,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
* tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held.
*/
-struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
@@ -361,6 +356,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
+ * Increment the upper half of tree_mod_seq, set lower half zero.
+ *
+ * Must be called with fs_info->tree_mod_seq_lock held.
+ */
+static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
+{
+ u64 seq = atomic64_read(&fs_info->tree_mod_seq);
+ seq &= 0xffffffff00000000ull;
+ seq += 1ull << 32;
+ atomic64_set(&fs_info->tree_mod_seq, seq);
+ return seq;
+}
+
+/*
+ * Increment the lower half of tree_mod_seq.
+ *
+ * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
+ * are generated should not technically require a spin lock here. (Rationale:
+ * incrementing the minor while incrementing the major seq number is between its
+ * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
+ * just returns a unique sequence number as usual.) We have decided to leave
+ * that requirement in here and rethink it once we notice it really imposes a
+ * problem on some workload.
+ */
+static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+{
+ return atomic64_inc_return(&fs_info->tree_mod_seq);
+}
+
+/*
+ * return the last minor in the previous major tree_mod_seq number
+ */
+u64 btrfs_tree_mod_seq_prev(u64 seq)
+{
+ return (seq & 0xffffffff00000000ull) - 1ull;
+}
+
+/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
@@ -376,10 +409,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
- elem->seq = btrfs_inc_tree_mod_seq(fs_info);
+ elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- seq = btrfs_inc_tree_mod_seq(fs_info);
+ seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
@@ -524,7 +557,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
if (!tm)
return -ENOMEM;
- tm->seq = btrfs_inc_tree_mod_seq(fs_info);
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
return tm->seq;
}
@@ -643,7 +679,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
static noinline int
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *old_root,
- struct extent_buffer *new_root, gfp_t flags)
+ struct extent_buffer *new_root, gfp_t flags,
+ int log_removal)
{
struct tree_mod_elem *tm;
int ret;
@@ -651,6 +688,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
if (tree_mod_dont_log(fs_info, NULL))
return 0;
+ if (log_removal)
+ __tree_mod_log_free_eb(fs_info, old_root);
+
ret = tree_mod_alloc(fs_info, flags, &tm);
if (ret < 0)
goto out;
@@ -751,8 +791,8 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
for (i = 0; i < nr_items; i++) {
ret = tree_mod_log_insert_key_locked(fs_info, src,
- i + src_offset,
- MOD_LOG_KEY_REMOVE);
+ i + src_offset,
+ MOD_LOG_KEY_REMOVE);
BUG_ON(ret < 0);
ret = tree_mod_log_insert_key_locked(fs_info, dst,
i + dst_offset,
@@ -798,11 +838,12 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
static noinline void
tree_mod_log_set_root_pointer(struct btrfs_root *root,
- struct extent_buffer *new_root_node)
+ struct extent_buffer *new_root_node,
+ int log_removal)
{
int ret;
ret = tree_mod_log_insert_root(root->fs_info, root->node,
- new_root_node, GFP_NOFS);
+ new_root_node, GFP_NOFS, log_removal);
BUG_ON(ret < 0);
}
@@ -863,7 +904,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (btrfs_block_can_be_shared(root, buf)) {
ret = btrfs_lookup_extent_info(trans, root, buf->start,
- buf->len, &refs, &flags);
+ btrfs_header_level(buf), 1,
+ &refs, &flags);
if (ret)
return ret;
if (refs == 0) {
@@ -909,10 +951,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
+ int level = btrfs_header_level(buf);
+
ret = btrfs_set_disk_extent_flags(trans, root,
buf->start,
buf->len,
- new_flags, 0);
+ new_flags, level, 0);
if (ret)
return ret;
}
@@ -927,7 +971,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret); /* -ENOMEM */
}
- tree_mod_log_free_eb(root->fs_info, buf);
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
@@ -1025,7 +1068,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = 0;
extent_buffer_get(cow);
- tree_mod_log_set_root_pointer(root, cow);
+ tree_mod_log_set_root_pointer(root, cow, 1);
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -1046,6 +1089,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
+ tree_mod_log_free_eb(root->fs_info, buf);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -1063,11 +1107,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
*/
static struct tree_mod_elem *
__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
- struct btrfs_root *root, u64 time_seq)
+ struct extent_buffer *eb_root, u64 time_seq)
{
struct tree_mod_elem *tm;
struct tree_mod_elem *found = NULL;
- u64 root_logical = root->node->start;
+ u64 root_logical = eb_root->start;
int looped = 0;
if (!time_seq)
@@ -1101,7 +1145,6 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
found = tm;
root_logical = tm->old_root.logical;
- BUG_ON(root_logical == root->node->start);
looped = 1;
}
@@ -1186,6 +1229,13 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
btrfs_set_header_nritems(eb, n);
}
+/*
+ * Called with eb read locked. If the buffer cannot be rewinded, the same buffer
+ * is returned. If rewind operations happen, a fresh buffer is returned. The
+ * returned buffer is always read-locked. If the returned buffer is not the
+ * input buffer, the lock on the input buffer is released and the input buffer
+ * is freed (its refcount is decremented).
+ */
static struct extent_buffer *
tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
u64 time_seq)
@@ -1219,8 +1269,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
}
extent_buffer_get(eb_rewin);
+ btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
+ extent_buffer_get(eb_rewin);
+ btrfs_tree_read_lock(eb_rewin);
__tree_mod_log_rewind(eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
@@ -1239,33 +1292,35 @@ static inline struct extent_buffer *
get_old_root(struct btrfs_root *root, u64 time_seq)
{
struct tree_mod_elem *tm;
- struct extent_buffer *eb;
+ struct extent_buffer *eb = NULL;
+ struct extent_buffer *eb_root;
struct extent_buffer *old;
struct tree_mod_root *old_root = NULL;
u64 old_generation = 0;
u64 logical;
u32 blocksize;
- eb = btrfs_read_lock_root_node(root);
- tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+ eb_root = btrfs_read_lock_root_node(root);
+ tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
if (!tm)
- return root->node;
+ return eb_root;
if (tm->op == MOD_LOG_ROOT_REPLACE) {
old_root = &tm->old_root;
old_generation = tm->generation;
logical = old_root->logical;
} else {
- logical = root->node->start;
+ logical = eb_root->start;
}
tm = tree_mod_log_search(root->fs_info, logical, time_seq);
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ btrfs_tree_read_unlock(eb_root);
+ free_extent_buffer(eb_root);
blocksize = btrfs_level_size(root, old_root->level);
old = read_tree_block(root, logical, blocksize, 0);
- if (!old) {
+ if (!old || !extent_buffer_uptodate(old)) {
+ free_extent_buffer(old);
pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
logical);
WARN_ON(1);
@@ -1274,13 +1329,13 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(old);
}
} else if (old_root) {
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ btrfs_tree_read_unlock(eb_root);
+ free_extent_buffer(eb_root);
eb = alloc_dummy_extent_buffer(logical, root->nodesize);
} else {
- eb = btrfs_clone_extent_buffer(root->node);
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ eb = btrfs_clone_extent_buffer(eb_root);
+ btrfs_tree_read_unlock(eb_root);
+ free_extent_buffer(eb_root);
}
if (!eb)
@@ -1290,7 +1345,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(eb, root->root_key.objectid);
+ btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
@@ -1307,15 +1362,15 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
{
struct tree_mod_elem *tm;
int level;
+ struct extent_buffer *eb_root = btrfs_root_node(root);
- tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+ tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
level = tm->old_root.level;
} else {
- rcu_read_lock();
- level = btrfs_header_level(root->node);
- rcu_read_unlock();
+ level = btrfs_header_level(eb_root);
}
+ free_extent_buffer(eb_root);
return level;
}
@@ -1510,8 +1565,10 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
if (!cur) {
cur = read_tree_block(root, blocknr,
blocksize, gen);
- if (!cur)
+ if (!cur || !extent_buffer_uptodate(cur)) {
+ free_extent_buffer(cur);
return -EIO;
+ }
} else if (!uptodate) {
err = btrfs_read_buffer(cur, gen);
if (err) {
@@ -1676,6 +1733,8 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
struct extent_buffer *parent, int slot)
{
int level = btrfs_header_level(parent);
+ struct extent_buffer *eb;
+
if (slot < 0)
return NULL;
if (slot >= btrfs_header_nritems(parent))
@@ -1683,9 +1742,15 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
BUG_ON(level == 0);
- return read_tree_block(root, btrfs_node_blockptr(parent, slot),
- btrfs_level_size(root, level - 1),
- btrfs_node_ptr_generation(parent, slot));
+ eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
+ btrfs_level_size(root, level - 1),
+ btrfs_node_ptr_generation(parent, slot));
+ if (eb && !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ eb = NULL;
+ }
+
+ return eb;
}
/*
@@ -1750,8 +1815,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
- tree_mod_log_free_eb(root->fs_info, root->node);
- tree_mod_log_set_root_pointer(root, child);
+ tree_mod_log_set_root_pointer(root, child, 1);
rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
@@ -1815,7 +1879,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(right) == 0) {
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
- del_ptr(trans, root, path, level + 1, pslot + 1);
+ del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
free_extent_buffer_stale(right);
@@ -1859,7 +1923,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(mid) == 0) {
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
- del_ptr(trans, root, path, level + 1, pslot);
+ del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
free_extent_buffer_stale(mid);
@@ -2207,9 +2271,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
int no_skips = 0;
struct extent_buffer *t;
- if (path->really_keep_locks)
- return;
-
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
if (!path->nodes[i])
break;
@@ -2257,7 +2318,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
{
int i;
- if (path->keep_locks || path->really_keep_locks)
+ if (path->keep_locks)
return;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -2490,7 +2551,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
if (!cow)
write_lock_level = -1;
- if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level))
+ if (cow && (p->keep_locks || p->lowest_level))
write_lock_level = BTRFS_MAX_LEVEL;
min_write_lock_level = write_lock_level;
@@ -2792,15 +2853,9 @@ again:
btrfs_clear_path_blocking(p, b,
BTRFS_READ_LOCK);
}
+ b = tree_mod_log_rewind(root->fs_info, b, time_seq);
p->locks[level] = BTRFS_READ_LOCK;
p->nodes[level] = b;
- b = tree_mod_log_rewind(root->fs_info, b, time_seq);
- if (b != p->nodes[level]) {
- btrfs_tree_unlock_rw(p->nodes[level],
- p->locks[level]);
- p->locks[level] = 0;
- p->nodes[level] = b;
- }
} else {
p->slots[level] = slot;
unlock_up(p, level, lowest_unlock, 0, NULL);
@@ -2899,8 +2954,7 @@ again:
* higher levels
*
*/
-static void fixup_low_keys(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key *key, int level)
{
int i;
@@ -2925,8 +2979,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
* This function isn't completely safe. It's the caller's responsibility
* that the new key won't break the order
*/
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
@@ -2948,7 +3001,7 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
btrfs_set_item_key(eb, &disk_key, slot);
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
/*
@@ -3090,7 +3143,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
*/
static noinline int insert_new_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
- struct btrfs_path *path, int level)
+ struct btrfs_path *path, int level, int log_removal)
{
u64 lower_gen;
struct extent_buffer *lower;
@@ -3141,7 +3194,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
old = root->node;
- tree_mod_log_set_root_pointer(root, c);
+ tree_mod_log_set_root_pointer(root, c, log_removal);
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
@@ -3222,8 +3275,17 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
c = path->nodes[level];
WARN_ON(btrfs_header_generation(c) != trans->transid);
if (c == root->node) {
- /* trying to split the root, lets make a new one */
- ret = insert_new_root(trans, root, path, level + 1);
+ /*
+ * trying to split the root, lets make a new one
+ *
+ * tree mod log: We pass 0 as log_removal parameter to
+ * insert_new_root, because that root buffer will be kept as a
+ * normal node. We are going to log removal of half of the
+ * elements below with tree_mod_log_eb_copy. We're holding a
+ * tree lock on the buffer, which is why we cannot race with
+ * other tree_mod_log users.
+ */
+ ret = insert_new_root(trans, root, path, level + 1, 0);
if (ret)
return ret;
} else {
@@ -3677,7 +3739,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -3943,7 +4005,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
}
if (!path->nodes[1]) {
- ret = insert_new_root(trans, root, path, 1);
+ ret = insert_new_root(trans, root, path, 1, 1);
if (ret)
return ret;
}
@@ -4037,8 +4099,7 @@ again:
path->nodes[0] = right;
path->slots[0] = 0;
if (path->slots[1] == 0)
- fixup_low_keys(trans, root, path,
- &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
btrfs_mark_buffer_dirty(right);
return ret;
@@ -4254,7 +4315,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
return ret;
path->slots[0]++;
- setup_items_for_insert(trans, root, path, new_key, &item_size,
+ setup_items_for_insert(root, path, new_key, &item_size,
item_size, item_size +
sizeof(struct btrfs_item), 1);
leaf = path->nodes[0];
@@ -4271,9 +4332,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
* off the end of the item or if we shift the item to chop bytes off
* the front.
*/
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
+void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
u32 new_size, int from_end)
{
int slot;
@@ -4357,7 +4416,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
btrfs_set_item_key(leaf, &disk_key, slot);
if (slot == 0)
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
item = btrfs_item_nr(leaf, slot);
@@ -4371,10 +4430,9 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
}
/*
- * make the item pointed to by the path bigger, data_size is the new size.
+ * make the item pointed to by the path bigger, data_size is the added size.
*/
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
u32 data_size)
{
int slot;
@@ -4444,8 +4502,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
* to save stack depth by doing the bulk of the work in a function
* that doesn't call btrfs_search_slot
*/
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr)
{
@@ -4521,7 +4578,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
if (slot == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(leaf);
@@ -4561,7 +4618,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
slot = path->slots[0];
BUG_ON(slot < 0);
- setup_items_for_insert(trans, root, path, cpu_key, data_size,
+ setup_items_for_insert(root, path, cpu_key, data_size,
total_data, total_size, nr);
return 0;
}
@@ -4599,8 +4656,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
* the tree should have been previously balanced so the deletion does not
* empty a node.
*/
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot)
+static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+ int level, int slot)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
@@ -4632,7 +4689,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, level + 1);
+ fixup_low_keys(root, path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
}
@@ -4653,7 +4710,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf)
{
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- del_ptr(trans, root, path, 1, path->slots[1]);
+ del_ptr(root, path, 1, path->slots[1]);
/*
* btrfs_free_extent is expensive, we want to make sure we
@@ -4734,7 +4791,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
@@ -5454,139 +5511,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
return btrfs_next_old_leaf(root, path, 0);
}
-/* Release the path up to but not including the given level */
-static void btrfs_release_level(struct btrfs_path *path, int level)
-{
- int i;
-
- for (i = 0; i < level; i++) {
- path->slots[i] = 0;
- if (!path->nodes[i])
- continue;
- if (path->locks[i]) {
- btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
- path->locks[i] = 0;
- }
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
-}
-
-/*
- * This function assumes 2 things
- *
- * 1) You are using path->keep_locks
- * 2) You are not inserting items.
- *
- * If either of these are not true do not use this function. If you need a next
- * leaf with either of these not being true then this function can be easily
- * adapted to do that, but at the moment these are the limitations.
- */
-int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- int del)
-{
- struct extent_buffer *b;
- struct btrfs_key key;
- u32 nritems;
- int level = 1;
- int slot;
- int ret = 1;
- int write_lock_level = BTRFS_MAX_LEVEL;
- int ins_len = del ? -1 : 0;
-
- WARN_ON(!(path->keep_locks || path->really_keep_locks));
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
-
- while (path->nodes[level]) {
- nritems = btrfs_header_nritems(path->nodes[level]);
- if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
-search:
- btrfs_release_path(path);
- ret = btrfs_search_slot(trans, root, &key, path,
- ins_len, 1);
- if (ret < 0)
- goto out;
- level = 1;
- continue;
- }
-
- if (path->slots[level] >= nritems - 1) {
- level++;
- continue;
- }
-
- btrfs_release_level(path, level);
- break;
- }
-
- if (!path->nodes[level]) {
- ret = 1;
- goto out;
- }
-
- path->slots[level]++;
- b = path->nodes[level];
-
- while (b) {
- level = btrfs_header_level(b);
-
- if (!should_cow_block(trans, root, b))
- goto cow_done;
-
- btrfs_set_path_blocking(path);
- ret = btrfs_cow_block(trans, root, b,
- path->nodes[level + 1],
- path->slots[level + 1], &b);
- if (ret)
- goto out;
-cow_done:
- path->nodes[level] = b;
- btrfs_clear_path_blocking(path, NULL, 0);
- if (level != 0) {
- ret = setup_nodes_for_search(trans, root, path, b,
- level, ins_len,
- &write_lock_level);
- if (ret == -EAGAIN)
- goto search;
- if (ret)
- goto out;
-
- b = path->nodes[level];
- slot = path->slots[level];
-
- ret = read_block_for_search(trans, root, path,
- &b, level, slot, &key, 0);
- if (ret == -EAGAIN)
- goto search;
- if (ret)
- goto out;
- level = btrfs_header_level(b);
- if (!btrfs_try_tree_write_lock(b)) {
- btrfs_set_path_blocking(path);
- btrfs_tree_lock(b);
- btrfs_clear_path_blocking(path, b,
- BTRFS_WRITE_LOCK);
- }
- path->locks[level] = BTRFS_WRITE_LOCK;
- path->nodes[level] = b;
- path->slots[level] = 0;
- } else {
- path->slots[level] = 0;
- ret = 0;
- break;
- }
- }
-
-out:
- if (ret)
- btrfs_release_path(path);
-
- return ret;
-}
-
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq)
{
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0d82922179db..d6dd49b51ba8 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -88,12 +88,12 @@ struct btrfs_ordered_sum;
/* holds checksums of all the data extents */
#define BTRFS_CSUM_TREE_OBJECTID 7ULL
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
/* holds quota configuration and tracking */
#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+/* for storing balance parameters in the root tree */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
/* orhpan objectid for tracking unlinked/truncated files */
#define BTRFS_ORPHAN_OBJECTID -5ULL
@@ -340,6 +340,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
*/
#define BTRFS_FS_STATE_ERROR 0
#define BTRFS_FS_STATE_REMOUNTING 1
+#define BTRFS_FS_STATE_TRANS_ABORTED 2
/* Super block flags */
/* Errors detected */
@@ -508,6 +509,7 @@ struct btrfs_super_block {
#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
@@ -518,7 +520,8 @@ struct btrfs_super_block {
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
- BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
/*
* A leaf is full of items. offset and size tell us where to find
@@ -583,7 +586,6 @@ struct btrfs_path {
unsigned int skip_locking:1;
unsigned int leave_spinning:1;
unsigned int search_commit_root:1;
- unsigned int really_keep_locks:1;
};
/*
@@ -1019,9 +1021,9 @@ struct btrfs_block_group_item {
*/
#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
/*
- * SCANNING is set during the initialization phase
+ * RESCAN is set during the initialization phase
*/
-#define BTRFS_QGROUP_STATUS_FLAG_SCANNING (1ULL << 1)
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
/*
* Some qgroup entries are known to be out of date,
* either because the configuration has changed in a way that
@@ -1050,7 +1052,7 @@ struct btrfs_qgroup_status_item {
* only used during scanning to record the progress
* of the scan. It contains a logical address
*/
- __le64 scan;
+ __le64 rescan;
} __attribute__ ((__packed__));
struct btrfs_qgroup_info_item {
@@ -1360,6 +1362,17 @@ struct btrfs_fs_info {
wait_queue_head_t transaction_blocked_wait;
wait_queue_head_t async_submit_wait;
+ /*
+ * Used to protect the incompat_flags, compat_flags, compat_ro_flags
+ * when they are updated.
+ *
+ * Because we do not clear the flags for ever, so we needn't use
+ * the lock on the read side.
+ *
+ * We also needn't use the lock when we mount the fs, because
+ * there is no other task which will update the flag.
+ */
+ spinlock_t super_lock;
struct btrfs_super_block *super_copy;
struct btrfs_super_block *super_for_commit;
struct block_device *__bdev;
@@ -1409,7 +1422,7 @@ struct btrfs_fs_info {
/* this protects tree_mod_seq_list */
spinlock_t tree_mod_seq_lock;
- atomic_t tree_mod_seq;
+ atomic64_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
@@ -1581,12 +1594,20 @@ struct btrfs_fs_info {
struct rb_root qgroup_tree;
spinlock_t qgroup_lock;
+ /* protect user change for quota operations */
+ struct mutex qgroup_ioctl_lock;
+
/* list of dirty qgroups to be written at next commit */
struct list_head dirty_qgroups;
/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
u64 qgroup_seq;
+ /* qgroup rescan items */
+ struct mutex qgroup_rescan_lock; /* protects the progress item */
+ struct btrfs_key qgroup_rescan_progress;
+ struct btrfs_workers qgroup_rescan_workers;
+
/* filesystem state */
unsigned long fs_state;
@@ -1808,6 +1829,12 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_EXTENT_ITEM_KEY 168
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY 169
+
#define BTRFS_TREE_BLOCK_REF_KEY 176
#define BTRFS_EXTENT_DATA_REF_KEY 178
@@ -2766,8 +2793,10 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
{
- int t = btrfs_super_csum_type(s);
- BUG_ON(t >= ARRAY_SIZE(btrfs_csum_sizes));
+ u16 t = btrfs_super_csum_type(s);
+ /*
+ * csum type is validated at mount time
+ */
return btrfs_csum_sizes[t];
}
@@ -2864,8 +2893,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
version, 64);
BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
flags, 64);
-BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
- scan, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+ rescan, 64);
/* btrfs_qgroup_info_item */
BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
@@ -3005,7 +3034,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *refs, u64 *flags);
+ u64 offset, int metadata, u64 *refs, u64 *flags);
int btrfs_pin_extent(struct btrfs_root *root,
u64 bytenr, u64 num, int reserved);
int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
@@ -3017,8 +3046,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
struct btrfs_fs_info *info,
u64 bytenr);
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
-u64 btrfs_find_block_group(struct btrfs_root *root,
- u64 search_start, u64 search_hint, int owner);
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
@@ -3028,10 +3055,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf,
u64 parent, int last_ref);
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- int level);
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 root_objectid, u64 owner,
@@ -3044,7 +3067,7 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, u64 data);
+ struct btrfs_key *ins, int is_data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3052,7 +3075,7 @@ int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
- int is_data);
+ int level, int is_data);
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
@@ -3084,7 +3107,6 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
@@ -3161,8 +3183,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2);
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *new_key);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -3198,12 +3219,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
u32 data_size);
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
+void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -3243,8 +3261,7 @@ static inline int btrfs_del_item(struct btrfs_trans_handle *trans,
return btrfs_del_items(trans, root, path, path->slots[0], 1);
}
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr);
int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
@@ -3264,9 +3281,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
}
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
-int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- int del);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
static inline int btrfs_next_old_item(struct btrfs_root *root,
@@ -3281,7 +3295,6 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
}
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
@@ -3318,10 +3331,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
-static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
-{
- return atomic_inc_return(&fs_info->tree_mod_seq);
-}
+u64 btrfs_tree_mod_seq_prev(u64 seq);
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
@@ -3345,9 +3355,8 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *key,
struct btrfs_root_item *item);
-void btrfs_read_root_item(struct btrfs_root *root,
- struct extent_buffer *eb, int slot,
- struct btrfs_root_item *item);
+void btrfs_read_root_item(struct extent_buffer *eb, int slot,
+ struct btrfs_root_item *item);
int btrfs_find_last_root(struct btrfs_root *root, u64 objectid, struct
btrfs_root_item *item, struct btrfs_key *key);
int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid);
@@ -3380,9 +3389,6 @@ struct btrfs_dir_item *
btrfs_search_dir_index_item(struct btrfs_root *root,
struct btrfs_path *path, u64 dirid,
const char *name, int name_len);
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
- struct btrfs_path *path,
- const char *name, int name_len);
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -3460,16 +3466,11 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
u64 bytenr, int mod);
-u64 btrfs_file_extent_length(struct btrfs_path *path);
int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig);
-struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, int cow);
int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
u64 isize);
@@ -3531,8 +3532,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
-int btrfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc);
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
struct btrfs_root *new_root, u64 new_dirid);
int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
@@ -3542,7 +3541,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
int btrfs_readpage(struct file *file, struct page *page);
void btrfs_evict_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-int btrfs_dirty_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
int btrfs_drop_inode(struct inode *inode);
@@ -3560,7 +3558,6 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode);
int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
-int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
int btrfs_orphan_cleanup(struct btrfs_root *root);
void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@@ -3611,7 +3608,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
-void btrfs_drop_pages(struct page **pages, size_t num_pages);
int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
struct page **pages, size_t num_pages,
loff_t pos, size_t write_bytes,
@@ -3634,14 +3630,31 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
#ifdef CONFIG_PRINTK
__printf(2, 3)
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
+void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...);
#else
static inline __printf(2, 3)
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
}
#endif
+#define btrfs_emerg(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_EMERG fmt, ##args)
+#define btrfs_alert(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_ALERT fmt, ##args)
+#define btrfs_crit(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_CRIT fmt, ##args)
+#define btrfs_err(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_ERR fmt, ##args)
+#define btrfs_warn(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_WARNING fmt, ##args)
+#define btrfs_notice(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_NOTICE fmt, ##args)
+#define btrfs_info(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_INFO fmt, ##args)
+#define btrfs_debug(fs_info, fmt, args...) \
+ btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
+
__printf(5, 6)
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
@@ -3663,11 +3676,28 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
disk_super = fs_info->super_copy;
features = btrfs_super_incompat_flags(disk_super);
if (!(features & flag)) {
- features |= flag;
- btrfs_set_super_incompat_flags(disk_super, features);
+ spin_lock(&fs_info->super_lock);
+ features = btrfs_super_incompat_flags(disk_super);
+ if (!(features & flag)) {
+ features |= flag;
+ btrfs_set_super_incompat_flags(disk_super, features);
+ printk(KERN_INFO "btrfs: setting %llu feature flag\n",
+ flag);
+ }
+ spin_unlock(&fs_info->super_lock);
}
}
+#define btrfs_fs_incompat(fs_info, opt) \
+ __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
+{
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_incompat_flags(disk_super) & flag);
+}
+
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
* detected, that way the exact line number is reported.
@@ -3753,7 +3783,6 @@ void btrfs_scrub_continue_super(struct btrfs_root *root);
int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info,
struct btrfs_device *dev);
-int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid);
int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
struct btrfs_scrub_progress *progress);
@@ -3784,7 +3813,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0b278b117cbe..f26f38ccd194 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -22,8 +22,9 @@
#include "disk-io.h"
#include "transaction.h"
-#define BTRFS_DELAYED_WRITEBACK 400
-#define BTRFS_DELAYED_BACKGROUND 100
+#define BTRFS_DELAYED_WRITEBACK 512
+#define BTRFS_DELAYED_BACKGROUND 128
+#define BTRFS_DELAYED_BATCH 16
static struct kmem_cache *delayed_node_cache;
@@ -201,7 +202,7 @@ static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
spin_unlock(&root->lock);
}
-struct btrfs_delayed_node *btrfs_first_delayed_node(
+static struct btrfs_delayed_node *btrfs_first_delayed_node(
struct btrfs_delayed_root *delayed_root)
{
struct list_head *p;
@@ -220,7 +221,7 @@ out:
return node;
}
-struct btrfs_delayed_node *btrfs_next_delayed_node(
+static struct btrfs_delayed_node *btrfs_next_delayed_node(
struct btrfs_delayed_node *node)
{
struct btrfs_delayed_root *delayed_root;
@@ -281,7 +282,7 @@ static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
__btrfs_release_delayed_node(node, 0);
}
-struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
+static struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
struct btrfs_delayed_root *delayed_root)
{
struct list_head *p;
@@ -307,7 +308,7 @@ static inline void btrfs_release_prepared_delayed_node(
__btrfs_release_delayed_node(node, 1);
}
-struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
+static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
{
struct btrfs_delayed_item *item;
item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
@@ -382,7 +383,7 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
return NULL;
}
-struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
+static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
struct btrfs_delayed_node *delayed_node,
struct btrfs_key *key)
{
@@ -393,45 +394,6 @@ struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
return item;
}
-struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
- NULL, NULL);
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item, *next;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
- NULL, &next);
- if (!item)
- item = next;
-
- return item;
-}
-
-struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
- struct btrfs_delayed_node *delayed_node,
- struct btrfs_key *key)
-{
- struct btrfs_delayed_item *item, *next;
-
- item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
- NULL, &next);
- if (!item)
- item = next;
-
- return item;
-}
-
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
struct btrfs_delayed_item *ins,
int action)
@@ -494,6 +456,15 @@ static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
BTRFS_DELAYED_DELETION_ITEM);
}
+static void finish_one_item(struct btrfs_delayed_root *delayed_root)
+{
+ int seq = atomic_inc_return(&delayed_root->items_seq);
+ if ((atomic_dec_return(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND || seq % BTRFS_DELAYED_BATCH == 0) &&
+ waitqueue_active(&delayed_root->wait))
+ wake_up(&delayed_root->wait);
+}
+
static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
{
struct rb_root *root;
@@ -512,10 +483,8 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
rb_erase(&delayed_item->rb_node, root);
delayed_item->delayed_node->count--;
- if (atomic_dec_return(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
+
+ finish_one_item(delayed_root);
}
static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
@@ -527,7 +496,7 @@ static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
}
}
-struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
+static struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
struct btrfs_delayed_node *delayed_node)
{
struct rb_node *p;
@@ -540,7 +509,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
return item;
}
-struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
+static struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
struct btrfs_delayed_node *delayed_node)
{
struct rb_node *p;
@@ -553,7 +522,7 @@ struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
return item;
}
-struct btrfs_delayed_item *__btrfs_next_delayed_item(
+static struct btrfs_delayed_item *__btrfs_next_delayed_item(
struct btrfs_delayed_item *item)
{
struct rb_node *p;
@@ -758,10 +727,9 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
* This helper will insert some continuous items into the same leaf according
* to the free space of the leaf.
*/
-static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_delayed_item *item)
+static int btrfs_batch_insert_items(struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_delayed_item *item)
{
struct btrfs_delayed_item *curr, *next;
int free_space;
@@ -840,7 +808,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
btrfs_clear_path_blocking(path, NULL, 0);
/* insert the keys of the items */
- setup_items_for_insert(trans, root, path, keys, data_size,
+ setup_items_for_insert(root, path, keys, data_size,
total_data_size, total_size, nitems);
/* insert the dir index items */
@@ -924,7 +892,7 @@ do_again:
if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
/* insert the continuous items into the same leaf */
path->slots[0]++;
- btrfs_batch_insert_items(trans, root, path, curr);
+ btrfs_batch_insert_items(root, path, curr);
}
btrfs_release_delayed_item(prev);
btrfs_mark_buffer_dirty(path->nodes[0]);
@@ -1056,10 +1024,7 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
delayed_node->count--;
delayed_root = delayed_node->root->fs_info->delayed_root;
- if (atomic_dec_return(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND &&
- waitqueue_active(&delayed_root->wait))
- wake_up(&delayed_root->wait);
+ finish_one_item(delayed_root);
}
}
@@ -1304,35 +1269,44 @@ void btrfs_remove_delayed_node(struct inode *inode)
btrfs_release_delayed_node(delayed_node);
}
-struct btrfs_async_delayed_node {
- struct btrfs_root *root;
- struct btrfs_delayed_node *delayed_node;
+struct btrfs_async_delayed_work {
+ struct btrfs_delayed_root *delayed_root;
+ int nr;
struct btrfs_work work;
};
-static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
+static void btrfs_async_run_delayed_root(struct btrfs_work *work)
{
- struct btrfs_async_delayed_node *async_node;
+ struct btrfs_async_delayed_work *async_work;
+ struct btrfs_delayed_root *delayed_root;
struct btrfs_trans_handle *trans;
struct btrfs_path *path;
struct btrfs_delayed_node *delayed_node = NULL;
struct btrfs_root *root;
struct btrfs_block_rsv *block_rsv;
- int need_requeue = 0;
+ int total_done = 0;
- async_node = container_of(work, struct btrfs_async_delayed_node, work);
+ async_work = container_of(work, struct btrfs_async_delayed_work, work);
+ delayed_root = async_work->delayed_root;
path = btrfs_alloc_path();
if (!path)
goto out;
- path->leave_spinning = 1;
- delayed_node = async_node->delayed_node;
+again:
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND / 2)
+ goto free_path;
+
+ delayed_node = btrfs_first_prepared_delayed_node(delayed_root);
+ if (!delayed_node)
+ goto free_path;
+
+ path->leave_spinning = 1;
root = delayed_node->root;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
- goto free_path;
+ goto release_path;
block_rsv = trans->block_rsv;
trans->block_rsv = &root->fs_info->delayed_block_rsv;
@@ -1363,57 +1337,47 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
* Task1 will sleep until the transaction is commited.
*/
mutex_lock(&delayed_node->mutex);
- if (delayed_node->count)
- need_requeue = 1;
- else
- btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
- delayed_node);
+ btrfs_dequeue_delayed_node(root->fs_info->delayed_root, delayed_node);
mutex_unlock(&delayed_node->mutex);
trans->block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
btrfs_btree_balance_dirty_nodelay(root);
+
+release_path:
+ btrfs_release_path(path);
+ total_done++;
+
+ btrfs_release_prepared_delayed_node(delayed_node);
+ if (async_work->nr == 0 || total_done < async_work->nr)
+ goto again;
+
free_path:
btrfs_free_path(path);
out:
- if (need_requeue)
- btrfs_requeue_work(&async_node->work);
- else {
- btrfs_release_prepared_delayed_node(delayed_node);
- kfree(async_node);
- }
+ wake_up(&delayed_root->wait);
+ kfree(async_work);
}
+
static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
- struct btrfs_root *root, int all)
+ struct btrfs_root *root, int nr)
{
- struct btrfs_async_delayed_node *async_node;
- struct btrfs_delayed_node *curr;
- int count = 0;
+ struct btrfs_async_delayed_work *async_work;
-again:
- curr = btrfs_first_prepared_delayed_node(delayed_root);
- if (!curr)
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return 0;
- async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
- if (!async_node) {
- btrfs_release_prepared_delayed_node(curr);
+ async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
+ if (!async_work)
return -ENOMEM;
- }
- async_node->root = root;
- async_node->delayed_node = curr;
-
- async_node->work.func = btrfs_async_run_delayed_node_done;
- async_node->work.flags = 0;
-
- btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
- count++;
-
- if (all || count < 4)
- goto again;
+ async_work->delayed_root = delayed_root;
+ async_work->work.func = btrfs_async_run_delayed_root;
+ async_work->work.flags = 0;
+ async_work->nr = nr;
+ btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work);
return 0;
}
@@ -1424,30 +1388,55 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
WARN_ON(btrfs_first_delayed_node(delayed_root));
}
+static int refs_newer(struct btrfs_delayed_root *delayed_root,
+ int seq, int count)
+{
+ int val = atomic_read(&delayed_root->items_seq);
+
+ if (val < seq || val >= seq + count)
+ return 1;
+ return 0;
+}
+
void btrfs_balance_delayed_items(struct btrfs_root *root)
{
struct btrfs_delayed_root *delayed_root;
+ int seq;
delayed_root = btrfs_get_delayed_root(root);
if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
return;
+ seq = atomic_read(&delayed_root->items_seq);
+
if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
int ret;
- ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
+ DEFINE_WAIT(__wait);
+
+ ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
if (ret)
return;
- wait_event_interruptible_timeout(
- delayed_root->wait,
- (atomic_read(&delayed_root->items) <
- BTRFS_DELAYED_BACKGROUND),
- HZ);
- return;
+ while (1) {
+ prepare_to_wait(&delayed_root->wait, &__wait,
+ TASK_INTERRUPTIBLE);
+
+ if (refs_newer(delayed_root, seq,
+ BTRFS_DELAYED_BATCH) ||
+ atomic_read(&delayed_root->items) <
+ BTRFS_DELAYED_BACKGROUND) {
+ break;
+ }
+ if (!signal_pending(current))
+ schedule();
+ else
+ break;
+ }
+ finish_wait(&delayed_root->wait, &__wait);
}
- btrfs_wq_run_delayed_node(delayed_root, root, 0);
+ btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
}
/* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 78b6ad0fc669..1d5c5f7abe3e 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -43,6 +43,7 @@ struct btrfs_delayed_root {
*/
struct list_head prepare_list;
atomic_t items; /* for delayed items */
+ atomic_t items_seq; /* for delayed items */
int nodes; /* for delayed nodes */
wait_queue_head_t wait;
};
@@ -86,6 +87,7 @@ static inline void btrfs_init_delayed_root(
struct btrfs_delayed_root *delayed_root)
{
atomic_set(&delayed_root->items, 0);
+ atomic_set(&delayed_root->items_seq, 0);
delayed_root->nodes = 0;
spin_lock_init(&delayed_root->lock);
init_waitqueue_head(&delayed_root->wait);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index b7a0641ead77..c219463fb1fd 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -40,16 +40,19 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep;
* compare two delayed tree backrefs with same bytenr and type
*/
static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
- struct btrfs_delayed_tree_ref *ref1)
+ struct btrfs_delayed_tree_ref *ref1, int type)
{
- if (ref1->root < ref2->root)
- return -1;
- if (ref1->root > ref2->root)
- return 1;
- if (ref1->parent < ref2->parent)
- return -1;
- if (ref1->parent > ref2->parent)
- return 1;
+ if (type == BTRFS_TREE_BLOCK_REF_KEY) {
+ if (ref1->root < ref2->root)
+ return -1;
+ if (ref1->root > ref2->root)
+ return 1;
+ } else {
+ if (ref1->parent < ref2->parent)
+ return -1;
+ if (ref1->parent > ref2->parent)
+ return 1;
+ }
return 0;
}
@@ -113,7 +116,8 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
- btrfs_delayed_node_to_tree_ref(ref1));
+ btrfs_delayed_node_to_tree_ref(ref1),
+ ref1->type);
} else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
@@ -357,8 +361,10 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq >= elem->seq) {
- pr_debug("holding back delayed_ref %llu, lowest is "
- "%llu (%p)\n", seq, elem->seq, delayed_refs);
+ pr_debug("holding back delayed_ref %#x.%x, lowest is %#x.%x (%p)\n",
+ (u32)(seq >> 32), (u32)seq,
+ (u32)(elem->seq >> 32), (u32)elem->seq,
+ delayed_refs);
ret = 1;
}
}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index f75fcaf79aeb..70b962cc177d 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -60,6 +60,7 @@ struct btrfs_delayed_ref_node {
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
u64 flags_to_set;
+ int level;
unsigned int update_key:1;
unsigned int update_flags:1;
unsigned int is_data:1;
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7ba7b3900cb8..65241f32d3f8 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -313,6 +313,11 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
struct btrfs_device *tgt_device = NULL;
struct btrfs_device *src_device = NULL;
+ if (btrfs_fs_incompat(fs_info, RAID56)) {
+ pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n");
+ return -EINVAL;
+ }
+
switch (args->start.cont_reading_from_srcdev_mode) {
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS:
case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID:
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 502c2158167c..79e594e341c7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,6 +21,10 @@
#include "hash.h"
#include "transaction.h"
+static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len);
+
/*
* insert a name into a directory, doing overflow properly if there is a hash
* collision. data_size indicates how big the item inserted should be. On
@@ -49,7 +53,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
di = btrfs_match_dir_item_name(root, path, name, name_len);
if (di)
return ERR_PTR(-EEXIST);
- btrfs_extend_item(trans, root, path, data_size);
+ btrfs_extend_item(root, path, data_size);
} else if (ret < 0)
return ERR_PTR(ret);
WARN_ON(ret > 0);
@@ -379,7 +383,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
* this walks through all the entries in a dir item and finds one
* for a specific name.
*/
-struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
struct btrfs_path *path,
const char *name, int name_len)
{
@@ -442,8 +446,7 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_len - (ptr + sub_item_len - start));
- btrfs_truncate_item(trans, root, path,
- item_len - sub_item_len, 1);
+ btrfs_truncate_item(root, path, item_len - sub_item_len, 1);
}
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02369a3c162e..40c7bc300075 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/migrate.h>
#include <linux/ratelimit.h>
+#include <linux/uuid.h>
#include <asm/unaligned.h>
#include "compat.h"
#include "ctree.h"
@@ -62,13 +63,15 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
int mark);
static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
+static void btrfs_error_commit_super(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
@@ -149,7 +152,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
{ .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
{ .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
- { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
+ { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" },
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
@@ -222,7 +225,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
+ ret = add_extent_mapping(em_tree, em, 0);
if (ret == -EEXIST) {
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
@@ -238,7 +241,7 @@ out:
return em;
}
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+u32 btrfs_csum_data(char *data, u32 seed, size_t len)
{
return crc32c(seed, data, len);
}
@@ -274,7 +277,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
if (err)
return 1;
cur_len = min(len, map_len - (offset - map_start));
- crc = btrfs_csum_data(root, kaddr + offset - map_start,
+ crc = btrfs_csum_data(kaddr + offset - map_start,
crc, cur_len);
len -= cur_len;
offset += cur_len;
@@ -354,6 +357,49 @@ out:
}
/*
+ * Return 0 if the superblock checksum type matches the checksum value of that
+ * algorithm. Pass the raw disk superblock data.
+ */
+static int btrfs_check_super_csum(char *raw_disk_sb)
+{
+ struct btrfs_super_block *disk_sb =
+ (struct btrfs_super_block *)raw_disk_sb;
+ u16 csum_type = btrfs_super_csum_type(disk_sb);
+ int ret = 0;
+
+ if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
+ u32 crc = ~(u32)0;
+ const int csum_size = sizeof(crc);
+ char result[csum_size];
+
+ /*
+ * The super_block structure does not span the whole
+ * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
+ * is filled with zeros and is included in the checkum.
+ */
+ crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
+ crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+
+ if (memcmp(raw_disk_sb, result, csum_size))
+ ret = 1;
+
+ if (ret && btrfs_super_generation(disk_sb) < 10) {
+ printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+ ret = 0;
+ }
+ }
+
+ if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
+ printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+ csum_type);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+/*
* helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try.
*/
@@ -530,41 +576,6 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
-struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
- struct page *page, int max_walk)
-{
- struct extent_buffer *eb;
- u64 start = page_offset(page);
- u64 target = start;
- u64 min_start;
-
- if (start < max_walk)
- min_start = 0;
- else
- min_start = start - max_walk;
-
- while (start >= min_start) {
- eb = find_extent_buffer(tree, start, 0);
- if (eb) {
- /*
- * we found an extent buffer and it contains our page
- * horray!
- */
- if (eb->start <= target &&
- eb->start + eb->len > target)
- return eb;
-
- /* we found an extent buffer that wasn't for us */
- free_extent_buffer(eb);
- return NULL;
- }
- if (start == 0)
- break;
- start -= PAGE_CACHE_SIZE;
- }
- return NULL;
-}
-
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror)
{
@@ -613,6 +624,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
goto err;
}
found_level = btrfs_header_level(eb);
+ if (found_level >= BTRFS_MAX_LEVEL) {
+ btrfs_info(root->fs_info, "bad tree block level %d\n",
+ (int)btrfs_header_level(eb));
+ ret = -EIO;
+ goto err;
+ }
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
@@ -636,10 +653,9 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (!ret)
set_extent_buffer_uptodate(eb);
err:
- if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
- clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
+ if (reads_done &&
+ test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, ret);
- }
if (ret) {
/*
@@ -993,17 +1009,12 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
{
if (PageWriteback(page) || PageDirty(page))
return 0;
- /*
- * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
- * slab allocation from alloc_extent_state down the callchain where
- * it'd hit a BUG_ON as those flags are not allowed.
- */
- gfp_flags &= ~GFP_SLAB_BUG_MASK;
- return try_release_extent_buffer(page, gfp_flags);
+ return try_release_extent_buffer(page);
}
-static void btree_invalidatepage(struct page *page, unsigned long offset)
+static void btree_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -1275,6 +1286,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_key key;
int ret = 0;
u64 bytenr;
+ uuid_le uuid;
root = btrfs_alloc_root(fs_info);
if (!root)
@@ -1291,6 +1303,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
+ leaf = NULL;
goto fail;
}
@@ -1323,6 +1336,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
+ uuid_le_gen(&uuid);
+ memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
root->root_item.drop_level = 0;
key.objectid = objectid;
@@ -1334,11 +1349,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(leaf);
+ return root;
+
fail:
- if (ret)
- return ERR_PTR(ret);
+ if (leaf) {
+ btrfs_tree_unlock(leaf);
+ free_extent_buffer(leaf);
+ }
+ kfree(root);
- return root;
+ return ERR_PTR(ret);
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -1470,7 +1490,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
if (ret == 0) {
l = path->nodes[0];
slot = path->slots[0];
- btrfs_read_root_item(tree_root, l, slot, &root->root_item);
+ btrfs_read_root_item(l, slot, &root->root_item);
memcpy(&root->root_key, location, sizeof(*location));
}
btrfs_free_path(path);
@@ -1485,8 +1505,15 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
+ if (!root->node || !extent_buffer_uptodate(root->node)) {
+ ret = (!root->node) ? -ENOMEM : -EIO;
+
+ free_extent_buffer(root->node);
+ kfree(root);
+ return ERR_PTR(ret);
+ }
+
root->commit_root = btrfs_root_node(root);
- BUG_ON(!root->node); /* -ENOMEM */
out:
if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
root->ref_cows = 1;
@@ -1652,15 +1679,20 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
do {
+ int again = 0;
+
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
- mutex_trylock(&root->fs_info->cleaner_mutex)) {
- btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
- mutex_unlock(&root->fs_info->cleaner_mutex);
+ down_read_trylock(&root->fs_info->sb->s_umount)) {
+ if (mutex_trylock(&root->fs_info->cleaner_mutex)) {
+ btrfs_run_delayed_iputs(root);
+ again = btrfs_clean_one_deleted_snapshot(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+ }
btrfs_run_defrag_inodes(root->fs_info);
+ up_read(&root->fs_info->sb->s_umount);
}
- if (!try_to_freeze()) {
+ if (!try_to_freeze() && !again) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
@@ -1929,35 +1961,60 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
return 0;
}
+/* helper to cleanup workers */
+static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
+{
+ btrfs_stop_workers(&fs_info->generic_worker);
+ btrfs_stop_workers(&fs_info->fixup_workers);
+ btrfs_stop_workers(&fs_info->delalloc_workers);
+ btrfs_stop_workers(&fs_info->workers);
+ btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->endio_meta_workers);
+ btrfs_stop_workers(&fs_info->endio_raid56_workers);
+ btrfs_stop_workers(&fs_info->rmw_workers);
+ btrfs_stop_workers(&fs_info->endio_meta_write_workers);
+ btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
+ btrfs_stop_workers(&fs_info->submit_workers);
+ btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
+ btrfs_stop_workers(&fs_info->readahead_workers);
+ btrfs_stop_workers(&fs_info->flush_workers);
+ btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
+}
+
/* helper to cleanup tree roots */
static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
{
free_extent_buffer(info->tree_root->node);
free_extent_buffer(info->tree_root->commit_root);
- free_extent_buffer(info->dev_root->node);
- free_extent_buffer(info->dev_root->commit_root);
- free_extent_buffer(info->extent_root->node);
- free_extent_buffer(info->extent_root->commit_root);
- free_extent_buffer(info->csum_root->node);
- free_extent_buffer(info->csum_root->commit_root);
- if (info->quota_root) {
- free_extent_buffer(info->quota_root->node);
- free_extent_buffer(info->quota_root->commit_root);
- }
-
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
- info->dev_root->node = NULL;
- info->dev_root->commit_root = NULL;
- info->extent_root->node = NULL;
- info->extent_root->commit_root = NULL;
- info->csum_root->node = NULL;
- info->csum_root->commit_root = NULL;
+
+ if (info->dev_root) {
+ free_extent_buffer(info->dev_root->node);
+ free_extent_buffer(info->dev_root->commit_root);
+ info->dev_root->node = NULL;
+ info->dev_root->commit_root = NULL;
+ }
+ if (info->extent_root) {
+ free_extent_buffer(info->extent_root->node);
+ free_extent_buffer(info->extent_root->commit_root);
+ info->extent_root->node = NULL;
+ info->extent_root->commit_root = NULL;
+ }
+ if (info->csum_root) {
+ free_extent_buffer(info->csum_root->node);
+ free_extent_buffer(info->csum_root->commit_root);
+ info->csum_root->node = NULL;
+ info->csum_root->commit_root = NULL;
+ }
if (info->quota_root) {
+ free_extent_buffer(info->quota_root->node);
+ free_extent_buffer(info->quota_root->commit_root);
info->quota_root->node = NULL;
info->quota_root->commit_root = NULL;
}
-
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
free_extent_buffer(info->chunk_root->commit_root);
@@ -1966,6 +2023,36 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
}
}
+static void del_fs_roots(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+ struct btrfs_root *gang[8];
+ int i;
+
+ while (!list_empty(&fs_info->dead_roots)) {
+ gang[0] = list_entry(fs_info->dead_roots.next,
+ struct btrfs_root, root_list);
+ list_del(&gang[0]->root_list);
+
+ if (gang[0]->in_radix) {
+ btrfs_free_fs_root(fs_info, gang[0]);
+ } else {
+ free_extent_buffer(gang[0]->node);
+ free_extent_buffer(gang[0]->commit_root);
+ kfree(gang[0]);
+ }
+ }
+
+ while (1) {
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang));
+ if (!ret)
+ break;
+ for (i = 0; i < ret; i++)
+ btrfs_free_fs_root(fs_info, gang[i]);
+ }
+}
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
@@ -2054,6 +2141,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
+ spin_lock_init(&fs_info->super_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
seqlock_init(&fs_info->profiles_lock);
@@ -2077,7 +2165,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
- atomic_set(&fs_info->tree_mod_seq, 0);
+ atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
@@ -2181,11 +2269,13 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->dev_replace.lock);
spin_lock_init(&fs_info->qgroup_lock);
+ mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
+ mutex_init(&fs_info->qgroup_rescan_lock);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2205,12 +2295,31 @@ int open_ctree(struct super_block *sb,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
invalidate_bdev(fs_devices->latest_bdev);
+
+ /*
+ * Read super block and check the signature bytes only
+ */
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh) {
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We want to check superblock checksum, the type is stored inside.
+ * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
+ */
+ if (btrfs_check_super_csum(bh->b_data)) {
+ printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
+ /*
+ * super_copy is zeroed at allocation time and we never touch the
+ * following bytes up to INFO_SIZE, the checksum is calculated from
+ * the whole block of INFO_SIZE
+ */
memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
memcpy(fs_info->super_for_commit, fs_info->super_copy,
sizeof(*fs_info->super_for_commit));
@@ -2218,6 +2327,13 @@ int open_ctree(struct super_block *sb,
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
+ ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
+ if (ret) {
+ printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
disk_super = fs_info->super_copy;
if (!btrfs_super_root(disk_super))
goto fail_alloc;
@@ -2226,13 +2342,6 @@ int open_ctree(struct super_block *sb,
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
- ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
- if (ret) {
- printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
- err = ret;
- goto fail_alloc;
- }
-
/*
* run through our array of backup supers and setup
* our ring pointer to the oldest one
@@ -2284,6 +2393,9 @@ int open_ctree(struct super_block *sb,
if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+ printk(KERN_ERR "btrfs: has skinny extents\n");
+
/*
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
@@ -2313,6 +2425,10 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
+ /*
+ * Needn't use the lock because there is no other task which will
+ * update the flag.
+ */
btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2388,6 +2504,8 @@ int open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->readahead_workers, "readahead",
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
+ &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
@@ -2422,6 +2540,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
ret |= btrfs_start_workers(&fs_info->flush_workers);
+ ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
if (ret) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2469,8 +2588,8 @@ int open_ctree(struct super_block *sb,
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
blocksize, generation);
- BUG_ON(!chunk_root->node); /* -ENOMEM */
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+ if (!chunk_root->node ||
+ !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
@@ -2655,6 +2774,13 @@ retry_root_backup:
log_tree_root->node = read_tree_block(tree_root, bytenr,
blocksize,
generation + 1);
+ if (!log_tree_root->node ||
+ !extent_buffer_uptodate(log_tree_root->node)) {
+ printk(KERN_ERR "btrfs: failed to read log tree\n");
+ free_extent_buffer(log_tree_root->node);
+ kfree(log_tree_root);
+ goto fail_trans_kthread;
+ }
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
if (ret) {
@@ -2734,6 +2860,8 @@ fail_qgroup:
btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
+ del_fs_roots(fs_info);
+ btrfs_cleanup_transaction(fs_info->tree_root);
fail_cleaner:
kthread_stop(fs_info->cleaner_kthread);
@@ -2744,6 +2872,7 @@ fail_cleaner:
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
fail_block_groups:
+ btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
fail_tree_roots:
@@ -2751,22 +2880,7 @@ fail_tree_roots:
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
fail_sb_buffer:
- btrfs_stop_workers(&fs_info->generic_worker);
- btrfs_stop_workers(&fs_info->readahead_workers);
- btrfs_stop_workers(&fs_info->fixup_workers);
- btrfs_stop_workers(&fs_info->delalloc_workers);
- btrfs_stop_workers(&fs_info->workers);
- btrfs_stop_workers(&fs_info->endio_workers);
- btrfs_stop_workers(&fs_info->endio_meta_workers);
- btrfs_stop_workers(&fs_info->endio_raid56_workers);
- btrfs_stop_workers(&fs_info->rmw_workers);
- btrfs_stop_workers(&fs_info->endio_meta_write_workers);
- btrfs_stop_workers(&fs_info->endio_write_workers);
- btrfs_stop_workers(&fs_info->endio_freespace_worker);
- btrfs_stop_workers(&fs_info->submit_workers);
- btrfs_stop_workers(&fs_info->delayed_workers);
- btrfs_stop_workers(&fs_info->caching_workers);
- btrfs_stop_workers(&fs_info->flush_workers);
+ btrfs_stop_all_workers(fs_info);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2898,7 +3012,10 @@ static int write_dev_supers(struct btrfs_device *device,
if (wait) {
bh = __find_get_block(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
- BUG_ON(!bh);
+ if (!bh) {
+ errors++;
+ continue;
+ }
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
errors++;
@@ -2913,7 +3030,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr);
crc = ~(u32)0;
- crc = btrfs_csum_data(NULL, (char *)sb +
+ crc = btrfs_csum_data((char *)sb +
BTRFS_CSUM_SIZE, crc,
BTRFS_SUPER_INFO_SIZE -
BTRFS_CSUM_SIZE);
@@ -2925,6 +3042,13 @@ static int write_dev_supers(struct btrfs_device *device,
*/
bh = __getblk(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
+ if (!bh) {
+ printk(KERN_ERR "btrfs: couldn't get super "
+ "buffer head for bytenr %Lu\n", bytenr);
+ errors++;
+ continue;
+ }
+
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
/* one reference for submit_bh */
@@ -3007,7 +3131,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
* caller
*/
device->flush_bio = NULL;
- bio = bio_alloc(GFP_NOFS, 0);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
return -ENOMEM;
@@ -3147,7 +3271,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
return num_tolerated_disk_barrier_failures;
}
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
+static int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
struct list_head *head;
struct btrfs_device *dev;
@@ -3253,7 +3377,7 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
btrfs_free_log(NULL, root);
btrfs_free_log_root_tree(NULL, fs_info);
}
@@ -3277,37 +3401,6 @@ static void free_fs_root(struct btrfs_root *root)
kfree(root);
}
-static void del_fs_roots(struct btrfs_fs_info *fs_info)
-{
- int ret;
- struct btrfs_root *gang[8];
- int i;
-
- while (!list_empty(&fs_info->dead_roots)) {
- gang[0] = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
- list_del(&gang[0]->root_list);
-
- if (gang[0]->in_radix) {
- btrfs_free_fs_root(fs_info, gang[0]);
- } else {
- free_extent_buffer(gang[0]->node);
- free_extent_buffer(gang[0]->commit_root);
- kfree(gang[0]);
- }
- }
-
- while (1) {
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
- (void **)gang, 0,
- ARRAY_SIZE(gang));
- if (!ret)
- break;
- for (i = 0; i < ret; i++)
- btrfs_free_fs_root(fs_info, gang[i]);
- }
-}
-
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
@@ -3343,8 +3436,8 @@ int btrfs_commit_super(struct btrfs_root *root)
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
+ wake_up_process(root->fs_info->cleaner_kthread);
/* wait until ongoing cleanup work done */
down_write(&root->fs_info->cleanup_work_sem);
@@ -3420,20 +3513,7 @@ int close_ctree(struct btrfs_root *root)
percpu_counter_sum(&fs_info->delalloc_bytes));
}
- free_extent_buffer(fs_info->extent_root->node);
- free_extent_buffer(fs_info->extent_root->commit_root);
- free_extent_buffer(fs_info->tree_root->node);
- free_extent_buffer(fs_info->tree_root->commit_root);
- free_extent_buffer(fs_info->chunk_root->node);
- free_extent_buffer(fs_info->chunk_root->commit_root);
- free_extent_buffer(fs_info->dev_root->node);
- free_extent_buffer(fs_info->dev_root->commit_root);
- free_extent_buffer(fs_info->csum_root->node);
- free_extent_buffer(fs_info->csum_root->commit_root);
- if (fs_info->quota_root) {
- free_extent_buffer(fs_info->quota_root->node);
- free_extent_buffer(fs_info->quota_root->commit_root);
- }
+ free_root_pointers(fs_info, 1);
btrfs_free_block_groups(fs_info);
@@ -3441,22 +3521,7 @@ int close_ctree(struct btrfs_root *root)
iput(fs_info->btree_inode);
- btrfs_stop_workers(&fs_info->generic_worker);
- btrfs_stop_workers(&fs_info->fixup_workers);
- btrfs_stop_workers(&fs_info->delalloc_workers);
- btrfs_stop_workers(&fs_info->workers);
- btrfs_stop_workers(&fs_info->endio_workers);
- btrfs_stop_workers(&fs_info->endio_meta_workers);
- btrfs_stop_workers(&fs_info->endio_raid56_workers);
- btrfs_stop_workers(&fs_info->rmw_workers);
- btrfs_stop_workers(&fs_info->endio_meta_write_workers);
- btrfs_stop_workers(&fs_info->endio_write_workers);
- btrfs_stop_workers(&fs_info->endio_freespace_worker);
- btrfs_stop_workers(&fs_info->submit_workers);
- btrfs_stop_workers(&fs_info->delayed_workers);
- btrfs_stop_workers(&fs_info->caching_workers);
- btrfs_stop_workers(&fs_info->readahead_workers);
- btrfs_stop_workers(&fs_info->flush_workers);
+ btrfs_stop_all_workers(fs_info);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3561,18 +3626,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
- if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
- printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
- return -EINVAL;
- }
-
- if (read_only)
- return 0;
-
+ /*
+ * Placeholder for checks
+ */
return 0;
}
-void btrfs_error_commit_super(struct btrfs_root *root)
+static void btrfs_error_commit_super(struct btrfs_root *root)
{
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
@@ -3602,8 +3662,11 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
ordered_operations);
list_del_init(&btrfs_inode->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_extent_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
+
+ spin_lock(&root->fs_info->ordered_extent_lock);
}
spin_unlock(&root->fs_info->ordered_extent_lock);
@@ -3663,6 +3726,9 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
continue;
}
+ if (head->must_insert_reserved)
+ btrfs_pin_extent(root, ref->bytenr,
+ ref->num_bytes, 1);
btrfs_free_delayed_extent_op(head->extent_op);
delayed_refs->num_heads--;
if (list_empty(&head->cluster))
@@ -3687,7 +3753,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
{
struct btrfs_pending_snapshot *snapshot;
struct list_head splice;
@@ -3700,10 +3766,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
snapshot = list_entry(splice.next,
struct btrfs_pending_snapshot,
list);
-
+ snapshot->error = -ECANCELED;
list_del_init(&snapshot->list);
-
- kfree(snapshot);
}
}
@@ -3724,8 +3788,11 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_del_init(&btrfs_inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&btrfs_inode->runtime_flags);
+ spin_unlock(&root->fs_info->delalloc_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
+
+ spin_lock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
@@ -3736,13 +3803,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
int mark)
{
int ret;
- struct page *page;
- struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
u64 start = 0;
u64 end;
- u64 offset;
- unsigned long index;
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
@@ -3752,36 +3815,17 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
- index = start >> PAGE_CACHE_SHIFT;
- start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
- page = find_get_page(btree_inode->i_mapping, index);
- if (!page)
+ eb = btrfs_find_tree_block(root, start,
+ root->leafsize);
+ start += root->leafsize;
+ if (!eb)
continue;
- offset = page_offset(page);
-
- spin_lock(&dirty_pages->buffer_lock);
- eb = radix_tree_lookup(
- &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
- offset >> PAGE_CACHE_SHIFT);
- spin_unlock(&dirty_pages->buffer_lock);
- if (eb)
- ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
- &eb->bflags);
- if (PageWriteback(page))
- end_page_writeback(page);
-
- lock_page(page);
- if (PageDirty(page)) {
- clear_page_dirty_for_io(page);
- spin_lock_irq(&page->mapping->tree_lock);
- radix_tree_tag_clear(&page->mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irq(&page->mapping->tree_lock);
- }
+ wait_on_extent_buffer_writeback(eb);
- unlock_page(page);
- page_cache_release(page);
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY,
+ &eb->bflags))
+ clear_extent_buffer_dirty(eb);
+ free_extent_buffer_stale(eb);
}
}
@@ -3840,6 +3884,8 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->blocked = 1;
wake_up(&root->fs_info->transaction_blocked_wait);
+ btrfs_evict_pending_snapshots(cur_trans);
+
cur_trans->blocked = 0;
wake_up(&root->fs_info->transaction_wait);
@@ -3849,8 +3895,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);
- btrfs_destroy_pending_snapshots(cur_trans);
-
btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(root,
@@ -3862,7 +3906,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
*/
}
-int btrfs_cleanup_transaction(struct btrfs_root *root)
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
{
struct btrfs_transaction *t;
LIST_HEAD(list);
@@ -3883,10 +3927,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
btrfs_destroy_delayed_refs(t, root);
- btrfs_block_rsv_release(root,
- &root->fs_info->trans_block_rsv,
- t->dirty_pages.dirty_bytes);
-
/* FIXME: cleanup wait for commit */
t->in_commit = 1;
t->blocked = 1;
@@ -3894,6 +3934,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
wake_up(&root->fs_info->transaction_blocked_wait);
+ btrfs_evict_pending_snapshots(t);
+
t->blocked = 0;
smp_mb();
if (waitqueue_active(&root->fs_info->transaction_wait))
@@ -3907,8 +3949,6 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);
- btrfs_destroy_pending_snapshots(t);
-
btrfs_destroy_delalloc_inodes(root);
spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 034d7dc552b2..be69ce1b07a2 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -61,7 +61,6 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
-void btrfs_error_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
@@ -77,7 +76,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
+u32 btrfs_csum_data(char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata);
@@ -93,10 +92,8 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
-int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
-void btrfs_abort_devices(struct btrfs_root *root);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3e074dab2d57..df472ab1b5ac 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -105,6 +105,8 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int reserve);
static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
u64 num_bytes);
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int reserved);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -257,7 +259,8 @@ static int exclude_super_stripes(struct btrfs_root *root,
cache->bytes_super += stripe_len;
ret = add_excluded_extent(root, cache->key.objectid,
stripe_len);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
}
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
@@ -265,13 +268,35 @@ static int exclude_super_stripes(struct btrfs_root *root,
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ return ret;
while (nr--) {
- cache->bytes_super += stripe_len;
- ret = add_excluded_extent(root, logical[nr],
- stripe_len);
- BUG_ON(ret); /* -ENOMEM */
+ u64 start, len;
+
+ if (logical[nr] > cache->key.objectid +
+ cache->key.offset)
+ continue;
+
+ if (logical[nr] + stripe_len <= cache->key.objectid)
+ continue;
+
+ start = logical[nr];
+ if (start < cache->key.objectid) {
+ start = cache->key.objectid;
+ len = (logical[nr] + stripe_len) - start;
+ } else {
+ len = min_t(u64, stripe_len,
+ cache->key.objectid +
+ cache->key.offset - start);
+ }
+
+ cache->bytes_super += len;
+ ret = add_excluded_extent(root, start, len);
+ if (ret) {
+ kfree(logical);
+ return ret;
+ }
}
kfree(logical);
@@ -414,8 +439,7 @@ again:
if (ret)
break;
- if (need_resched() ||
- btrfs_next_leaf(extent_root, path)) {
+ if (need_resched()) {
caching_ctl->progress = last;
btrfs_release_path(path);
up_read(&fs_info->extent_commit_sem);
@@ -423,6 +447,12 @@ again:
cond_resched();
goto again;
}
+
+ ret = btrfs_next_leaf(extent_root, path);
+ if (ret < 0)
+ goto err;
+ if (ret)
+ break;
leaf = path->nodes[0];
nritems = btrfs_header_nritems(leaf);
continue;
@@ -437,11 +467,16 @@ again:
block_group->key.offset)
break;
- if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
total_found += add_new_free_space(block_group,
fs_info, last,
key.objectid);
- last = key.objectid + key.offset;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ last = key.objectid +
+ fs_info->tree_root->leafsize;
+ else
+ last = key.objectid + key.offset;
if (total_found > (1024 * 1024 * 2)) {
total_found = 0;
@@ -651,55 +686,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
rcu_read_unlock();
}
-u64 btrfs_find_block_group(struct btrfs_root *root,
- u64 search_start, u64 search_hint, int owner)
-{
- struct btrfs_block_group_cache *cache;
- u64 used;
- u64 last = max(search_hint, search_start);
- u64 group_start = 0;
- int full_search = 0;
- int factor = 9;
- int wrapped = 0;
-again:
- while (1) {
- cache = btrfs_lookup_first_block_group(root->fs_info, last);
- if (!cache)
- break;
-
- spin_lock(&cache->lock);
- last = cache->key.objectid + cache->key.offset;
- used = btrfs_block_group_used(&cache->item);
-
- if ((full_search || !cache->ro) &&
- block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
- if (used + cache->pinned + cache->reserved <
- div_factor(cache->key.offset, factor)) {
- group_start = cache->key.objectid;
- spin_unlock(&cache->lock);
- btrfs_put_block_group(cache);
- goto found;
- }
- }
- spin_unlock(&cache->lock);
- btrfs_put_block_group(cache);
- cond_resched();
- }
- if (!wrapped) {
- last = search_start;
- wrapped = 1;
- goto again;
- }
- if (!full_search && factor < 10) {
- last = search_start;
- full_search = 1;
- factor = 10;
- goto again;
- }
-found:
- return group_start;
-}
-
/* simple helper to search for an existing extent at a given offset */
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
{
@@ -713,15 +699,21 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
key.objectid = start;
key.offset = len;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
0, 0);
+ if (ret > 0) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid == start &&
+ key.type == BTRFS_METADATA_ITEM_KEY)
+ ret = 0;
+ }
btrfs_free_path(path);
return ret;
}
/*
- * helper function to lookup reference count and flags of extent.
+ * helper function to lookup reference count and flags of a tree block.
*
* the head node for delayed ref is used to store the sum of all the
* reference count modifications queued up in the rbtree. the head
@@ -731,7 +723,7 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
*/
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *refs, u64 *flags)
+ u64 offset, int metadata, u64 *refs, u64 *flags)
{
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -744,13 +736,29 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
u64 extent_flags;
int ret;
+ /*
+ * If we don't have skinny metadata, don't bother doing anything
+ * different
+ */
+ if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA)) {
+ offset = root->leafsize;
+ metadata = 0;
+ }
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- key.objectid = bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = num_bytes;
+ if (metadata) {
+ key.objectid = bytenr;
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = offset;
+ } else {
+ key.objectid = bytenr;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = offset;
+ }
+
if (!trans) {
path->skip_locking = 1;
path->search_commit_root = 1;
@@ -761,6 +769,13 @@ again:
if (ret < 0)
goto out_free;
+ if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = root->leafsize;
+ btrfs_release_path(path);
+ goto again;
+ }
+
if (ret == 0) {
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -996,7 +1011,7 @@ static int convert_extent_item_v0(struct btrfs_trans_handle *trans,
return ret;
BUG_ON(ret); /* Corruption */
- btrfs_extend_item(trans, root, path, new_size);
+ btrfs_extend_item(root, path, new_size);
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
@@ -1448,6 +1463,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
int want;
int ret;
int err = 0;
+ bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+ SKINNY_METADATA);
key.objectid = bytenr;
key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -1459,16 +1476,54 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
path->keep_locks = 1;
} else
extra_size = -1;
+
+ /*
+ * Owner is our parent level, so we can just add one to get the level
+ * for the block we are interested in.
+ */
+ if (skinny_metadata && owner < BTRFS_FIRST_FREE_OBJECTID) {
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = owner;
+ }
+
+again:
ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
if (ret < 0) {
err = ret;
goto out;
}
+
+ /*
+ * We may be a newly converted file system which still has the old fat
+ * extent entries for metadata, so try and see if we have one of those.
+ */
+ if (ret > 0 && skinny_metadata) {
+ skinny_metadata = false;
+ if (path->slots[0]) {
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == num_bytes)
+ ret = 0;
+ }
+ if (ret) {
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ btrfs_release_path(path);
+ goto again;
+ }
+ }
+
if (ret && !insert) {
err = -ENOENT;
goto out;
+ } else if (ret) {
+ err = -EIO;
+ WARN_ON(1);
+ goto out;
}
- BUG_ON(ret); /* Corruption */
leaf = path->nodes[0];
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
@@ -1496,11 +1551,9 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
ptr = (unsigned long)(ei + 1);
end = (unsigned long)ei + item_size;
- if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+ if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
ptr += sizeof(struct btrfs_tree_block_info);
BUG_ON(ptr > end);
- } else {
- BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA));
}
err = -ENOENT;
@@ -1582,8 +1635,7 @@ out:
* helper to add new inline back ref
*/
static noinline_for_stack
-void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+void setup_inline_extent_backref(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
u64 parent, u64 root_objectid,
@@ -1606,7 +1658,7 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
type = extent_ref_type(parent, owner);
size = btrfs_extent_inline_ref_size(type);
- btrfs_extend_item(trans, root, path, size);
+ btrfs_extend_item(root, path, size);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
@@ -1675,8 +1727,7 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
* helper to update/remove inline back ref
*/
static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+void update_inline_extent_backref(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
@@ -1732,7 +1783,7 @@ void update_inline_extent_backref(struct btrfs_trans_handle *trans,
memmove_extent_buffer(leaf, ptr, ptr + size,
end - ptr - size);
item_size -= size;
- btrfs_truncate_item(trans, root, path, item_size, 1);
+ btrfs_truncate_item(root, path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
}
@@ -1754,10 +1805,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
root_objectid, owner, offset, 1);
if (ret == 0) {
BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
- update_inline_extent_backref(trans, root, path, iref,
+ update_inline_extent_backref(root, path, iref,
refs_to_add, extent_op);
} else if (ret == -ENOENT) {
- setup_inline_extent_backref(trans, root, path, iref, parent,
+ setup_inline_extent_backref(root, path, iref, parent,
root_objectid, owner, offset,
refs_to_add, extent_op);
ret = 0;
@@ -1794,7 +1845,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref) {
- update_inline_extent_backref(trans, root, path, iref,
+ update_inline_extent_backref(root, path, iref,
-refs_to_drop, NULL);
} else if (is_data) {
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
@@ -1965,10 +2016,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
ref_root = ref->root;
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
- if (extent_op) {
- BUG_ON(extent_op->update_key);
+ if (extent_op)
flags |= extent_op->flags_to_set;
- }
ret = alloc_reserved_file_extent(trans, root,
parent, ref_root, flags,
ref->objectid, ref->offset,
@@ -2021,18 +2070,29 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
u32 item_size;
int ret;
int err = 0;
+ int metadata = !extent_op->is_data;
if (trans->aborted)
return 0;
+ if (metadata && !btrfs_fs_incompat(root->fs_info, SKINNY_METADATA))
+ metadata = 0;
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
key.objectid = node->bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = node->num_bytes;
+ if (metadata) {
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = extent_op->level;
+ } else {
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = node->num_bytes;
+ }
+
+again:
path->reada = 1;
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
@@ -2042,6 +2102,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
goto out;
}
if (ret > 0) {
+ if (metadata) {
+ btrfs_release_path(path);
+ metadata = 0;
+
+ key.offset = node->num_bytes;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ goto again;
+ }
err = -EIO;
goto out;
}
@@ -2081,10 +2149,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_key ins;
u64 parent = 0;
u64 ref_root = 0;
-
- ins.objectid = node->bytenr;
- ins.offset = node->num_bytes;
- ins.type = BTRFS_EXTENT_ITEM_KEY;
+ bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+ SKINNY_METADATA);
ref = btrfs_delayed_node_to_tree_ref(node);
if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
@@ -2092,10 +2158,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
else
ref_root = ref->root;
+ ins.objectid = node->bytenr;
+ if (skinny_metadata) {
+ ins.offset = ref->level;
+ ins.type = BTRFS_METADATA_ITEM_KEY;
+ } else {
+ ins.offset = node->num_bytes;
+ ins.type = BTRFS_EXTENT_ITEM_KEY;
+ }
+
BUG_ON(node->ref_mod != 1);
if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
- BUG_ON(!extent_op || !extent_op->update_flags ||
- !extent_op->update_key);
+ BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, root,
parent, ref_root,
extent_op->flags_to_set,
@@ -2299,9 +2373,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
btrfs_free_delayed_extent_op(extent_op);
if (ret) {
- printk(KERN_DEBUG
- "btrfs: run_delayed_extent_op "
- "returned %d\n", ret);
+ btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
spin_lock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(locked_ref);
return ret;
@@ -2340,8 +2412,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_delayed_ref_unlock(locked_ref);
btrfs_put_delayed_ref(ref);
- printk(KERN_DEBUG
- "btrfs: run_one_delayed_ref returned %d\n", ret);
+ btrfs_debug(fs_info, "run_one_delayed_ref returned %d", ret);
spin_lock(&delayed_refs->lock);
return ret;
}
@@ -2418,9 +2489,11 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
if (list_empty(&trans->qgroup_ref_list) !=
!trans->delayed_ref_elem.seq) {
/* list without seq or seq without list */
- printk(KERN_ERR "btrfs: qgroup accounting update error, list is%s empty, seq is %llu\n",
+ btrfs_err(fs_info,
+ "qgroup accounting update error, list is%s empty, seq is %#x.%x",
list_empty(&trans->qgroup_ref_list) ? "" : " not",
- trans->delayed_ref_elem.seq);
+ (u32)(trans->delayed_ref_elem.seq >> 32),
+ (u32)trans->delayed_ref_elem.seq);
BUG();
}
@@ -2642,7 +2715,7 @@ out:
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
- int is_data)
+ int level, int is_data)
{
struct btrfs_delayed_extent_op *extent_op;
int ret;
@@ -2655,6 +2728,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->update_flags = 1;
extent_op->update_key = 0;
extent_op->is_data = is_data ? 1 : 0;
+ extent_op->level = level;
ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
num_bytes, extent_op);
@@ -3032,6 +3106,11 @@ again:
WARN_ON(ret);
if (i_size_read(inode) > 0) {
+ ret = btrfs_check_trunc_cache_free_space(root,
+ &root->fs_info->global_block_rsv);
+ if (ret)
+ goto out_put;
+
ret = btrfs_truncate_free_space_cache(root, trans, path,
inode);
if (ret)
@@ -3329,7 +3408,7 @@ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags)
* progress (either running or paused) picks the target profile (if it's
* already available), otherwise falls back to plain reducing.
*/
-u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
/*
* we add in the count of missing devices because we want
@@ -3549,6 +3628,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
rcu_read_unlock();
}
+static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
+{
+ return (global->size << 1);
+}
+
static int should_alloc_chunk(struct btrfs_root *root,
struct btrfs_space_info *sinfo, int force)
{
@@ -3566,7 +3650,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
* global_rsv, it doesn't change except when the transaction commits.
*/
if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
- num_allocated += global_rsv->size;
+ num_allocated += calc_global_rsv_need_space(global_rsv);
/*
* in limited mode, we want to have some free space up to
@@ -3619,8 +3703,8 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
thresh = get_system_chunk_thresh(root, type);
if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
- printk(KERN_INFO "left=%llu, need=%llu, flags=%llu\n",
- left, thresh, type);
+ btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
+ left, thresh, type);
dump_space_info(info, 0, 0);
}
@@ -3738,7 +3822,7 @@ static int can_overcommit(struct btrfs_root *root,
{
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
u64 profile = btrfs_get_alloc_profile(root, 0);
- u64 rsv_size = 0;
+ u64 space_size;
u64 avail;
u64 used;
u64 to_add;
@@ -3746,18 +3830,16 @@ static int can_overcommit(struct btrfs_root *root,
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_pinned + space_info->bytes_readonly;
- spin_lock(&global_rsv->lock);
- rsv_size = global_rsv->size;
- spin_unlock(&global_rsv->lock);
-
/*
* We only want to allow over committing if we have lots of actual space
* free, but if we don't have enough space to handle the global reserve
* space then we could end up having a real enospc problem when trying
* to allocate a chunk or some other such important allocation.
*/
- rsv_size <<= 1;
- if (used + rsv_size >= space_info->total_bytes)
+ spin_lock(&global_rsv->lock);
+ space_size = calc_global_rsv_need_space(global_rsv);
+ spin_unlock(&global_rsv->lock);
+ if (used + space_size >= space_info->total_bytes)
return 0;
used += space_info->bytes_may_use;
@@ -3800,8 +3882,8 @@ static int can_overcommit(struct btrfs_root *root,
return 0;
}
-void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
- unsigned long nr_pages)
+static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
+ unsigned long nr_pages)
{
struct super_block *sb = root->fs_info->sb;
int started;
@@ -3818,7 +3900,8 @@ void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
* the disk).
*/
btrfs_start_delalloc_inodes(root, 0);
- btrfs_wait_ordered_extents(root, 0);
+ if (!current->journal_info)
+ btrfs_wait_ordered_extents(root, 0);
}
}
@@ -4435,7 +4518,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
- block_rsv->size = num_bytes;
+ block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4481,6 +4564,8 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
fs_info->dev_root->block_rsv = &fs_info->global_block_rsv;
fs_info->tree_root->block_rsv = &fs_info->global_block_rsv;
+ if (fs_info->quota_root)
+ fs_info->quota_root->block_rsv = &fs_info->global_block_rsv;
fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv;
update_global_block_rsv(fs_info);
@@ -4790,14 +4875,49 @@ out_fail:
* If the inodes csum_bytes is the same as the original
* csum_bytes then we know we haven't raced with any free()ers
* so we can just reduce our inodes csum bytes and carry on.
- * Otherwise we have to do the normal free thing to account for
- * the case that the free side didn't free up its reserve
- * because of this outstanding reservation.
*/
- if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+ if (BTRFS_I(inode)->csum_bytes == csum_bytes) {
calc_csum_metadata_size(inode, num_bytes, 0);
- else
- to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+ } else {
+ u64 orig_csum_bytes = BTRFS_I(inode)->csum_bytes;
+ u64 bytes;
+
+ /*
+ * This is tricky, but first we need to figure out how much we
+ * free'd from any free-ers that occured during this
+ * reservation, so we reset ->csum_bytes to the csum_bytes
+ * before we dropped our lock, and then call the free for the
+ * number of bytes that were freed while we were trying our
+ * reservation.
+ */
+ bytes = csum_bytes - BTRFS_I(inode)->csum_bytes;
+ BTRFS_I(inode)->csum_bytes = csum_bytes;
+ to_free = calc_csum_metadata_size(inode, bytes, 0);
+
+
+ /*
+ * Now we need to see how much we would have freed had we not
+ * been making this reservation and our ->csum_bytes were not
+ * artificially inflated.
+ */
+ BTRFS_I(inode)->csum_bytes = csum_bytes - num_bytes;
+ bytes = csum_bytes - orig_csum_bytes;
+ bytes = calc_csum_metadata_size(inode, bytes, 0);
+
+ /*
+ * Now reset ->csum_bytes to what it should be. If bytes is
+ * more than to_free then we would have free'd more space had we
+ * not had an artificially high ->csum_bytes, so we need to free
+ * the remainder. If bytes is the same or less then we don't
+ * need to do anything, the other free-ers did the correct
+ * thing.
+ */
+ BTRFS_I(inode)->csum_bytes = orig_csum_bytes - num_bytes;
+ if (bytes > to_free)
+ to_free = bytes - to_free;
+ else
+ to_free = 0;
+ }
spin_unlock(&BTRFS_I(inode)->lock);
if (dropped)
to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -5047,9 +5167,11 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
u64 bytenr, u64 num_bytes)
{
struct btrfs_block_group_cache *cache;
+ int ret;
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
- BUG_ON(!cache); /* Logic error */
+ if (!cache)
+ return -EINVAL;
/*
* pull in the free space cache (if any) so that our pin
@@ -5062,9 +5184,9 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
pin_down_extent(root, cache, bytenr, num_bytes, 0);
/* remove us from the free space cache (if we're there at all) */
- btrfs_remove_free_space(cache, bytenr, num_bytes);
+ ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
btrfs_put_block_group(cache);
- return 0;
+ return ret;
}
/**
@@ -5269,6 +5391,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
int num_to_del = 1;
u32 item_size;
u64 refs;
+ bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+ SKINNY_METADATA);
path = btrfs_alloc_path();
if (!path)
@@ -5280,6 +5404,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
BUG_ON(!is_data && refs_to_drop != 1);
+ if (is_data)
+ skinny_metadata = 0;
+
ret = lookup_extent_backref(trans, extent_root, path, &iref,
bytenr, num_bytes, parent,
root_objectid, owner_objectid,
@@ -5296,6 +5423,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
found_extent = 1;
break;
}
+ if (key.type == BTRFS_METADATA_ITEM_KEY &&
+ key.offset == owner_objectid) {
+ found_extent = 1;
+ break;
+ }
if (path->slots[0] - extent_slot > 5)
break;
extent_slot--;
@@ -5321,12 +5453,39 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = num_bytes;
+ if (!is_data && skinny_metadata) {
+ key.type = BTRFS_METADATA_ITEM_KEY;
+ key.offset = owner_objectid;
+ }
+
ret = btrfs_search_slot(trans, extent_root,
&key, path, -1, 1);
+ if (ret > 0 && skinny_metadata && path->slots[0]) {
+ /*
+ * Couldn't find our skinny metadata item,
+ * see if we have ye olde extent item.
+ */
+ path->slots[0]--;
+ btrfs_item_key_to_cpu(path->nodes[0], &key,
+ path->slots[0]);
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_EXTENT_ITEM_KEY &&
+ key.offset == num_bytes)
+ ret = 0;
+ }
+
+ if (ret > 0 && skinny_metadata) {
+ skinny_metadata = false;
+ key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.offset = num_bytes;
+ btrfs_release_path(path);
+ ret = btrfs_search_slot(trans, extent_root,
+ &key, path, -1, 1);
+ }
+
if (ret) {
- printk(KERN_ERR "umm, got %d back from search"
- ", was looking for %llu\n", ret,
- (unsigned long long)bytenr);
+ btrfs_err(info, "umm, got %d back from search, was looking for %llu",
+ ret, (unsigned long long)bytenr);
if (ret > 0)
btrfs_print_leaf(extent_root,
path->nodes[0]);
@@ -5340,13 +5499,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} else if (ret == -ENOENT) {
btrfs_print_leaf(extent_root, path->nodes[0]);
WARN_ON(1);
- printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
- "parent %llu root %llu owner %llu offset %llu\n",
- (unsigned long long)bytenr,
- (unsigned long long)parent,
- (unsigned long long)root_objectid,
- (unsigned long long)owner_objectid,
- (unsigned long long)owner_offset);
+ btrfs_err(info,
+ "unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
+ (unsigned long long)bytenr,
+ (unsigned long long)parent,
+ (unsigned long long)root_objectid,
+ (unsigned long long)owner_objectid,
+ (unsigned long long)owner_offset);
} else {
btrfs_abort_transaction(trans, extent_root, ret);
goto out;
@@ -5374,9 +5533,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, extent_root, &key, path,
-1, 1);
if (ret) {
- printk(KERN_ERR "umm, got %d back from search"
- ", was looking for %llu\n", ret,
- (unsigned long long)bytenr);
+ btrfs_err(info, "umm, got %d back from search, was looking for %llu",
+ ret, (unsigned long long)bytenr);
btrfs_print_leaf(extent_root, path->nodes[0]);
}
if (ret < 0) {
@@ -5392,7 +5550,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
BUG_ON(item_size < sizeof(*ei));
ei = btrfs_item_ptr(leaf, extent_slot,
struct btrfs_extent_item);
- if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
+ key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *bi;
BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -5400,7 +5559,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
refs = btrfs_extent_refs(leaf, ei);
- BUG_ON(refs < refs_to_drop);
+ if (refs < refs_to_drop) {
+ btrfs_err(info, "trying to drop %d refs but we only have %Lu "
+ "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+ ret = -EINVAL;
+ btrfs_abort_transaction(trans, extent_root, ret);
+ goto out;
+ }
refs -= refs_to_drop;
if (refs > 0) {
@@ -5715,7 +5880,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *orig_root,
u64 num_bytes, u64 empty_size,
u64 hint_byte, struct btrfs_key *ins,
- u64 data)
+ u64 flags)
{
int ret = 0;
struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -5726,8 +5891,8 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
int empty_cluster = 2 * 1024 * 1024;
struct btrfs_space_info *space_info;
int loop = 0;
- int index = __get_raid_index(data);
- int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
+ int index = __get_raid_index(flags);
+ int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ?
RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
bool found_uncached_bg = false;
bool failed_cluster_refill = false;
@@ -5740,11 +5905,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
ins->objectid = 0;
ins->offset = 0;
- trace_find_free_extent(orig_root, num_bytes, empty_size, data);
+ trace_find_free_extent(orig_root, num_bytes, empty_size, flags);
- space_info = __find_space_info(root->fs_info, data);
+ space_info = __find_space_info(root->fs_info, flags);
if (!space_info) {
- printk(KERN_ERR "No space info for %llu\n", data);
+ btrfs_err(root->fs_info, "No space info for %llu", flags);
return -ENOSPC;
}
@@ -5755,13 +5920,13 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
if (btrfs_mixed_space_info(space_info))
use_cluster = false;
- if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
+ if (flags & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
last_ptr = &root->fs_info->meta_alloc_cluster;
if (!btrfs_test_opt(root, SSD))
empty_cluster = 64 * 1024;
}
- if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+ if ((flags & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
btrfs_test_opt(root, SSD)) {
last_ptr = &root->fs_info->data_alloc_cluster;
}
@@ -5790,7 +5955,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
* However if we are re-searching with an ideal block group
* picked out then we don't care that the block group is cached.
*/
- if (block_group && block_group_bits(block_group, data) &&
+ if (block_group && block_group_bits(block_group, flags) &&
block_group->cached != BTRFS_CACHE_NO) {
down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) ||
@@ -5828,7 +5993,7 @@ search:
* raid types, but we want to make sure we only allocate
* for the proper type.
*/
- if (!block_group_bits(block_group, data)) {
+ if (!block_group_bits(block_group, flags)) {
u64 extra = BTRFS_BLOCK_GROUP_DUP |
BTRFS_BLOCK_GROUP_RAID1 |
BTRFS_BLOCK_GROUP_RAID5 |
@@ -5840,7 +6005,7 @@ search:
* doesn't provide them, bail. This does allow us to
* fill raid0 from raid1.
*/
- if ((data & extra) && !(block_group->flags & extra))
+ if ((flags & extra) && !(block_group->flags & extra))
goto loop;
}
@@ -5871,7 +6036,7 @@ have_block_group:
if (used_block_group != block_group &&
(!used_block_group ||
used_block_group->ro ||
- !block_group_bits(used_block_group, data))) {
+ !block_group_bits(used_block_group, flags))) {
used_block_group = block_group;
goto refill_cluster;
}
@@ -6067,7 +6232,7 @@ loop:
index = 0;
loop++;
if (loop == LOOP_ALLOC_CHUNK) {
- ret = do_chunk_alloc(trans, root, data,
+ ret = do_chunk_alloc(trans, root, flags,
CHUNK_ALLOC_FORCE);
/*
* Do not bail out on ENOSPC since we
@@ -6145,16 +6310,17 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 num_bytes, u64 min_alloc_size,
u64 empty_size, u64 hint_byte,
- struct btrfs_key *ins, u64 data)
+ struct btrfs_key *ins, int is_data)
{
bool final_tried = false;
+ u64 flags;
int ret;
- data = btrfs_get_alloc_profile(root, data);
+ flags = btrfs_get_alloc_profile(root, is_data);
again:
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
- hint_byte, ins, data);
+ hint_byte, ins, flags);
if (ret == -ENOSPC) {
if (!final_tried) {
@@ -6167,10 +6333,10 @@ again:
} else if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
struct btrfs_space_info *sinfo;
- sinfo = __find_space_info(root->fs_info, data);
- printk(KERN_ERR "btrfs allocation failed flags %llu, "
- "wanted %llu\n", (unsigned long long)data,
- (unsigned long long)num_bytes);
+ sinfo = __find_space_info(root->fs_info, flags);
+ btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
+ (unsigned long long)flags,
+ (unsigned long long)num_bytes);
if (sinfo)
dump_space_info(sinfo, num_bytes, 1);
}
@@ -6189,8 +6355,8 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
cache = btrfs_lookup_block_group(root->fs_info, start);
if (!cache) {
- printk(KERN_ERR "Unable to find block group for %llu\n",
- (unsigned long long)start);
+ btrfs_err(root->fs_info, "Unable to find block group for %llu",
+ (unsigned long long)start);
return -ENOSPC;
}
@@ -6285,9 +6451,9 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
ret = update_block_group(root, ins->objectid, ins->offset, 1);
if (ret) { /* -ENOENT, logic error */
- printk(KERN_ERR "btrfs update block group failed for %llu "
- "%llu\n", (unsigned long long)ins->objectid,
- (unsigned long long)ins->offset);
+ btrfs_err(fs_info, "update block group failed for %llu %llu",
+ (unsigned long long)ins->objectid,
+ (unsigned long long)ins->offset);
BUG();
}
return ret;
@@ -6306,7 +6472,12 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_extent_inline_ref *iref;
struct btrfs_path *path;
struct extent_buffer *leaf;
- u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
+ u32 size = sizeof(*extent_item) + sizeof(*iref);
+ bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+ SKINNY_METADATA);
+
+ if (!skinny_metadata)
+ size += sizeof(*block_info);
path = btrfs_alloc_path();
if (!path)
@@ -6327,12 +6498,16 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_set_extent_generation(leaf, extent_item, trans->transid);
btrfs_set_extent_flags(leaf, extent_item,
flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
- block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
- btrfs_set_tree_block_key(leaf, block_info, key);
- btrfs_set_tree_block_level(leaf, block_info, level);
+ if (skinny_metadata) {
+ iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+ } else {
+ block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
+ btrfs_set_tree_block_key(leaf, block_info, key);
+ btrfs_set_tree_block_level(leaf, block_info, level);
+ iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+ }
- iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
if (parent > 0) {
BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
btrfs_set_extent_inline_ref_type(leaf, iref,
@@ -6347,11 +6522,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
- ret = update_block_group(root, ins->objectid, ins->offset, 1);
+ ret = update_block_group(root, ins->objectid, root->leafsize, 1);
if (ret) { /* -ENOENT, logic error */
- printk(KERN_ERR "btrfs update block group failed for %llu "
- "%llu\n", (unsigned long long)ins->objectid,
- (unsigned long long)ins->offset);
+ btrfs_err(fs_info, "update block group failed for %llu %llu",
+ (unsigned long long)ins->objectid,
+ (unsigned long long)ins->offset);
BUG();
}
return ret;
@@ -6396,47 +6571,48 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
if (!caching_ctl) {
BUG_ON(!block_group_cache_done(block_group));
ret = btrfs_remove_free_space(block_group, start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out;
} else {
mutex_lock(&caching_ctl->mutex);
if (start >= caching_ctl->progress) {
ret = add_excluded_extent(root, start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
} else if (start + num_bytes <= caching_ctl->progress) {
ret = btrfs_remove_free_space(block_group,
start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
} else {
num_bytes = caching_ctl->progress - start;
ret = btrfs_remove_free_space(block_group,
start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_lock;
start = caching_ctl->progress;
num_bytes = ins->objectid + ins->offset -
caching_ctl->progress;
ret = add_excluded_extent(root, start, num_bytes);
- BUG_ON(ret); /* -ENOMEM */
}
-
+out_lock:
mutex_unlock(&caching_ctl->mutex);
put_caching_control(caching_ctl);
+ if (ret)
+ goto out;
}
ret = btrfs_update_reserved_bytes(block_group, ins->offset,
RESERVE_ALLOC_NO_ACCOUNT);
BUG_ON(ret); /* logic error */
- btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
+out:
+ btrfs_put_block_group(block_group);
return ret;
}
-struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- int level)
+static struct extent_buffer *
+btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ u64 bytenr, u32 blocksize, int level)
{
struct extent_buffer *buf;
@@ -6479,51 +6655,51 @@ use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_block_rsv *block_rsv;
struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
int ret;
+ bool global_updated = false;
block_rsv = get_block_rsv(trans, root);
- if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(root, block_rsv, blocksize,
- BTRFS_RESERVE_NO_FLUSH);
- /*
- * If we couldn't reserve metadata bytes try and use some from
- * the global reserve.
- */
- if (ret && block_rsv != global_rsv) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- return ERR_PTR(ret);
- } else if (ret) {
- return ERR_PTR(ret);
- }
+ if (unlikely(block_rsv->size == 0))
+ goto try_reserve;
+again:
+ ret = block_rsv_use_bytes(block_rsv, blocksize);
+ if (!ret)
return block_rsv;
+
+ if (block_rsv->failfast)
+ return ERR_PTR(ret);
+
+ if (block_rsv->type == BTRFS_BLOCK_RSV_GLOBAL && !global_updated) {
+ global_updated = true;
+ update_global_block_rsv(root->fs_info);
+ goto again;
}
- ret = block_rsv_use_bytes(block_rsv, blocksize);
+ if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ static DEFINE_RATELIMIT_STATE(_rs,
+ DEFAULT_RATELIMIT_INTERVAL * 10,
+ /*DEFAULT_RATELIMIT_BURST*/ 1);
+ if (__ratelimit(&_rs))
+ WARN(1, KERN_DEBUG
+ "btrfs: block rsv returned %d\n", ret);
+ }
+try_reserve:
+ ret = reserve_metadata_bytes(root, block_rsv, blocksize,
+ BTRFS_RESERVE_NO_FLUSH);
if (!ret)
return block_rsv;
- if (ret && !block_rsv->failfast) {
- if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL * 10,
- /*DEFAULT_RATELIMIT_BURST*/ 1);
- if (__ratelimit(&_rs))
- WARN(1, KERN_DEBUG
- "btrfs: block rsv returned %d\n", ret);
- }
- ret = reserve_metadata_bytes(root, block_rsv, blocksize,
- BTRFS_RESERVE_NO_FLUSH);
- if (!ret) {
- return block_rsv;
- } else if (ret && block_rsv != global_rsv) {
- ret = block_rsv_use_bytes(global_rsv, blocksize);
- if (!ret)
- return global_rsv;
- }
+ /*
+ * If we couldn't reserve metadata bytes try and use some from
+ * the global reserve if its space type is the same as the global
+ * reservation.
+ */
+ if (block_rsv->type != BTRFS_BLOCK_RSV_GLOBAL &&
+ block_rsv->space_info == global_rsv->space_info) {
+ ret = block_rsv_use_bytes(global_rsv, blocksize);
+ if (!ret)
+ return global_rsv;
}
-
- return ERR_PTR(-ENOSPC);
+ return ERR_PTR(ret);
}
static void unuse_block_rsv(struct btrfs_fs_info *fs_info,
@@ -6551,7 +6727,8 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct extent_buffer *buf;
u64 flags = 0;
int ret;
-
+ bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
+ SKINNY_METADATA);
block_rsv = use_block_rsv(trans, root, blocksize);
if (IS_ERR(block_rsv))
@@ -6584,9 +6761,13 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
- extent_op->update_key = 1;
+ if (skinny_metadata)
+ extent_op->update_key = 0;
+ else
+ extent_op->update_key = 1;
extent_op->update_flags = 1;
extent_op->is_data = 0;
+ extent_op->level = level;
ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
ins.objectid,
@@ -6661,8 +6842,9 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
continue;
/* We don't lock the tree block, it's OK to be racy here */
- ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
- &refs, &flags);
+ ret = btrfs_lookup_extent_info(trans, root, bytenr,
+ wc->level - 1, 1, &refs,
+ &flags);
/* We don't care about errors in readahead. */
if (ret < 0)
continue;
@@ -6729,7 +6911,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
(wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) {
BUG_ON(!path->locks[level]);
ret = btrfs_lookup_extent_info(trans, root,
- eb->start, eb->len,
+ eb->start, level, 1,
&wc->refs[level],
&wc->flags[level]);
BUG_ON(ret == -ENOMEM);
@@ -6757,7 +6939,8 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
- eb->len, flag, 0);
+ eb->len, flag,
+ btrfs_header_level(eb), 0);
BUG_ON(ret); /* -ENOMEM */
wc->flags[level] |= flag;
}
@@ -6827,7 +7010,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
- ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+ ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
&wc->refs[level - 1],
&wc->flags[level - 1]);
if (ret < 0) {
@@ -6835,7 +7018,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
return ret;
}
- BUG_ON(wc->refs[level - 1] == 0);
+ if (unlikely(wc->refs[level - 1] == 0)) {
+ btrfs_err(root->fs_info, "Missing references.");
+ BUG();
+ }
*lookup_info = 0;
if (wc->stage == DROP_REFERENCE) {
@@ -6874,8 +7060,10 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
if (reada && level == 1)
reada_walk_down(trans, root, wc, path);
next = read_tree_block(root, bytenr, blocksize, generation);
- if (!next)
+ if (!next || !extent_buffer_uptodate(next)) {
+ free_extent_buffer(next);
return -EIO;
+ }
btrfs_tree_lock(next);
btrfs_set_lock_blocking(next);
}
@@ -6958,7 +7146,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root,
- eb->start, eb->len,
+ eb->start, level, 1,
&wc->refs[level],
&wc->flags[level]);
if (ret < 0) {
@@ -7094,6 +7282,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* reference count by one. if update_ref is true, this function
* also make sure backrefs for the shared block and all lower level
* blocks are properly updated.
+ *
+ * If called with for_reloc == 0, may exit early with -EAGAIN
*/
int btrfs_drop_snapshot(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv, int update_ref,
@@ -7168,8 +7358,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
ret = btrfs_lookup_extent_info(trans, root,
path->nodes[level]->start,
- path->nodes[level]->len,
- &wc->refs[level],
+ level, 1, &wc->refs[level],
&wc->flags[level]);
if (ret < 0) {
err = ret;
@@ -7195,6 +7384,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
+ if (!for_reloc && btrfs_fs_closing(root->fs_info)) {
+ pr_debug("btrfs: drop snapshot early exit\n");
+ err = -EAGAIN;
+ goto out_end_trans;
+ }
+
ret = walk_down_tree(trans, root, path, wc);
if (ret < 0) {
err = ret;
@@ -7944,7 +8139,17 @@ int btrfs_read_block_groups(struct btrfs_root *root)
* info has super bytes accounted for, otherwise we'll think
* we have more space than we actually do.
*/
- exclude_super_stripes(root, cache);
+ ret = exclude_super_stripes(root, cache);
+ if (ret) {
+ /*
+ * We may have excluded something, so call this just in
+ * case.
+ */
+ free_excluded_extents(root, cache);
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+ goto error;
+ }
/*
* check for two cases, either we are full, and therefore
@@ -7967,10 +8172,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
free_excluded_extents(root, cache);
}
+ ret = btrfs_add_block_group_cache(root->fs_info, cache);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ btrfs_put_block_group(cache);
+ goto error;
+ }
+
ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item),
&space_info);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ spin_lock(&info->block_group_cache_lock);
+ rb_erase(&cache->cache_node,
+ &info->block_group_cache_tree);
+ spin_unlock(&info->block_group_cache_lock);
+ btrfs_put_block_group(cache);
+ goto error;
+ }
+
cache->space_info = space_info;
spin_lock(&cache->space_info->lock);
cache->space_info->bytes_readonly += cache->bytes_super;
@@ -7978,9 +8199,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
__link_block_group(space_info, cache);
- ret = btrfs_add_block_group_cache(root->fs_info, cache);
- BUG_ON(ret); /* Logic error */
-
set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
set_block_group_ro(cache, 1);
@@ -8086,16 +8304,41 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- exclude_super_stripes(root, cache);
+ ret = exclude_super_stripes(root, cache);
+ if (ret) {
+ /*
+ * We may have excluded something, so call this just in
+ * case.
+ */
+ free_excluded_extents(root, cache);
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+ return ret;
+ }
add_new_free_space(cache, root->fs_info, chunk_offset,
chunk_offset + size);
free_excluded_extents(root, cache);
+ ret = btrfs_add_block_group_cache(root->fs_info, cache);
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ btrfs_put_block_group(cache);
+ return ret;
+ }
+
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ btrfs_remove_free_space_cache(cache);
+ spin_lock(&root->fs_info->block_group_cache_lock);
+ rb_erase(&cache->cache_node,
+ &root->fs_info->block_group_cache_tree);
+ spin_unlock(&root->fs_info->block_group_cache_lock);
+ btrfs_put_block_group(cache);
+ return ret;
+ }
update_global_block_rsv(root->fs_info);
spin_lock(&cache->space_info->lock);
@@ -8104,9 +8347,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
__link_block_group(cache->space_info, cache);
- ret = btrfs_add_block_group_cache(root->fs_info, cache);
- BUG_ON(ret); /* Logic error */
-
list_add_tail(&cache->new_bg_list, &trans->new_bgs);
set_avail_alloc_bits(extent_root->fs_info, type);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f173c5af6461..6bca9472f313 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -23,13 +23,64 @@
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
+static struct bio_set *btrfs_bioset;
+#ifdef CONFIG_BTRFS_DEBUG
static LIST_HEAD(buffers);
static LIST_HEAD(states);
-#define LEAK_DEBUG 0
-#if LEAK_DEBUG
static DEFINE_SPINLOCK(leak_lock);
+
+static inline
+void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&leak_lock, flags);
+ list_add(new, head);
+ spin_unlock_irqrestore(&leak_lock, flags);
+}
+
+static inline
+void btrfs_leak_debug_del(struct list_head *entry)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&leak_lock, flags);
+ list_del(entry);
+ spin_unlock_irqrestore(&leak_lock, flags);
+}
+
+static inline
+void btrfs_leak_debug_check(void)
+{
+ struct extent_state *state;
+ struct extent_buffer *eb;
+
+ while (!list_empty(&states)) {
+ state = list_entry(states.next, struct extent_state, leak_list);
+ printk(KERN_ERR "btrfs state leak: start %llu end %llu "
+ "state %lu in tree %p refs %d\n",
+ (unsigned long long)state->start,
+ (unsigned long long)state->end,
+ state->state, state->tree, atomic_read(&state->refs));
+ list_del(&state->leak_list);
+ kmem_cache_free(extent_state_cache, state);
+ }
+
+ while (!list_empty(&buffers)) {
+ eb = list_entry(buffers.next, struct extent_buffer, leak_list);
+ printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
+ "refs %d\n", (unsigned long long)eb->start,
+ eb->len, atomic_read(&eb->refs));
+ list_del(&eb->leak_list);
+ kmem_cache_free(extent_buffer_cache, eb);
+ }
+}
+#else
+#define btrfs_leak_debug_add(new, head) do {} while (0)
+#define btrfs_leak_debug_del(entry) do {} while (0)
+#define btrfs_leak_debug_check() do {} while (0)
#endif
#define BUFFER_LRU_MAX 64
@@ -75,38 +126,26 @@ int __init extent_io_init(void)
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;
+
+ btrfs_bioset = bioset_create(BIO_POOL_SIZE,
+ offsetof(struct btrfs_io_bio, bio));
+ if (!btrfs_bioset)
+ goto free_buffer_cache;
return 0;
+free_buffer_cache:
+ kmem_cache_destroy(extent_buffer_cache);
+ extent_buffer_cache = NULL;
+
free_state_cache:
kmem_cache_destroy(extent_state_cache);
+ extent_state_cache = NULL;
return -ENOMEM;
}
void extent_io_exit(void)
{
- struct extent_state *state;
- struct extent_buffer *eb;
-
- while (!list_empty(&states)) {
- state = list_entry(states.next, struct extent_state, leak_list);
- printk(KERN_ERR "btrfs state leak: start %llu end %llu "
- "state %lu in tree %p refs %d\n",
- (unsigned long long)state->start,
- (unsigned long long)state->end,
- state->state, state->tree, atomic_read(&state->refs));
- list_del(&state->leak_list);
- kmem_cache_free(extent_state_cache, state);
-
- }
-
- while (!list_empty(&buffers)) {
- eb = list_entry(buffers.next, struct extent_buffer, leak_list);
- printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
- "refs %d\n", (unsigned long long)eb->start,
- eb->len, atomic_read(&eb->refs));
- list_del(&eb->leak_list);
- kmem_cache_free(extent_buffer_cache, eb);
- }
+ btrfs_leak_debug_check();
/*
* Make sure all delayed rcu free are flushed before we
@@ -117,6 +156,8 @@ void extent_io_exit(void)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
kmem_cache_destroy(extent_buffer_cache);
+ if (btrfs_bioset)
+ bioset_free(btrfs_bioset);
}
void extent_io_tree_init(struct extent_io_tree *tree,
@@ -134,9 +175,6 @@ void extent_io_tree_init(struct extent_io_tree *tree,
static struct extent_state *alloc_extent_state(gfp_t mask)
{
struct extent_state *state;
-#if LEAK_DEBUG
- unsigned long flags;
-#endif
state = kmem_cache_alloc(extent_state_cache, mask);
if (!state)
@@ -144,11 +182,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
state->state = 0;
state->private = 0;
state->tree = NULL;
-#if LEAK_DEBUG
- spin_lock_irqsave(&leak_lock, flags);
- list_add(&state->leak_list, &states);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+ btrfs_leak_debug_add(&state->leak_list, &states);
atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq);
trace_alloc_extent_state(state, mask, _RET_IP_);
@@ -160,15 +194,8 @@ void free_extent_state(struct extent_state *state)
if (!state)
return;
if (atomic_dec_and_test(&state->refs)) {
-#if LEAK_DEBUG
- unsigned long flags;
-#endif
WARN_ON(state->tree);
-#if LEAK_DEBUG
- spin_lock_irqsave(&leak_lock, flags);
- list_del(&state->leak_list);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+ btrfs_leak_debug_del(&state->leak_list);
trace_free_extent_state(state, _RET_IP_);
kmem_cache_free(extent_state_cache, state);
}
@@ -308,21 +335,21 @@ static void merge_state(struct extent_io_tree *tree,
}
static void set_state_cb(struct extent_io_tree *tree,
- struct extent_state *state, int *bits)
+ struct extent_state *state, unsigned long *bits)
{
if (tree->ops && tree->ops->set_bit_hook)
tree->ops->set_bit_hook(tree->mapping->host, state, bits);
}
static void clear_state_cb(struct extent_io_tree *tree,
- struct extent_state *state, int *bits)
+ struct extent_state *state, unsigned long *bits)
{
if (tree->ops && tree->ops->clear_bit_hook)
tree->ops->clear_bit_hook(tree->mapping->host, state, bits);
}
static void set_state_bits(struct extent_io_tree *tree,
- struct extent_state *state, int *bits);
+ struct extent_state *state, unsigned long *bits);
/*
* insert an extent_state struct into the tree. 'bits' are set on the
@@ -336,7 +363,7 @@ static void set_state_bits(struct extent_io_tree *tree,
*/
static int insert_state(struct extent_io_tree *tree,
struct extent_state *state, u64 start, u64 end,
- int *bits)
+ unsigned long *bits)
{
struct rb_node *node;
@@ -424,10 +451,10 @@ static struct extent_state *next_state(struct extent_state *state)
*/
static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state,
- int *bits, int wake)
+ unsigned long *bits, int wake)
{
struct extent_state *next;
- int bits_to_clear = *bits & ~EXTENT_CTLBITS;
+ unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1;
@@ -463,7 +490,7 @@ alloc_extent_state_atomic(struct extent_state *prealloc)
return prealloc;
}
-void extent_io_tree_panic(struct extent_io_tree *tree, int err)
+static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
{
btrfs_panic(tree_fs_info(tree), err, "Locking error: "
"Extent tree was modified by another "
@@ -483,7 +510,7 @@ void extent_io_tree_panic(struct extent_io_tree *tree, int err)
* This takes the tree lock, and returns 0 on success and < 0 on error.
*/
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete,
+ unsigned long bits, int wake, int delete,
struct extent_state **cached_state,
gfp_t mask)
{
@@ -644,7 +671,8 @@ static void wait_on_state(struct extent_io_tree *tree,
* The range [start, end] is inclusive.
* The tree lock is taken by this function
*/
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+static void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned long bits)
{
struct extent_state *state;
struct rb_node *node;
@@ -685,9 +713,9 @@ out:
static void set_state_bits(struct extent_io_tree *tree,
struct extent_state *state,
- int *bits)
+ unsigned long *bits)
{
- int bits_to_set = *bits & ~EXTENT_CTLBITS;
+ unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS;
set_state_cb(tree, state, bits);
if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
@@ -730,8 +758,9 @@ static void uncache_state(struct extent_state **cached_ptr)
static int __must_check
__set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int exclusive_bits, u64 *failed_start,
- struct extent_state **cached_state, gfp_t mask)
+ unsigned long bits, unsigned long exclusive_bits,
+ u64 *failed_start, struct extent_state **cached_state,
+ gfp_t mask)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@@ -923,9 +952,9 @@ search_again:
goto again;
}
-int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
- u64 *failed_start, struct extent_state **cached_state,
- gfp_t mask)
+int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ unsigned long bits, u64 * failed_start,
+ struct extent_state **cached_state, gfp_t mask)
{
return __set_extent_bit(tree, start, end, bits, 0, failed_start,
cached_state, mask);
@@ -950,7 +979,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
* boundary bits like LOCK.
*/
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int clear_bits,
+ unsigned long bits, unsigned long clear_bits,
struct extent_state **cached_state, gfp_t mask)
{
struct extent_state *state;
@@ -1143,14 +1172,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
}
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask)
+ unsigned long bits, gfp_t mask)
{
return set_extent_bit(tree, start, end, bits, NULL,
NULL, mask);
}
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask)
+ unsigned long bits, gfp_t mask)
{
return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
}
@@ -1189,7 +1218,7 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask)
{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
cached_state, mask);
}
@@ -1205,7 +1234,7 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
* us if waiting is desired.
*/
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, struct extent_state **cached_state)
+ unsigned long bits, struct extent_state **cached_state)
{
int err;
u64 failed_start;
@@ -1257,6 +1286,39 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
GFP_NOFS);
}
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(inode->i_mapping, index);
+ BUG_ON(!page); /* Pages should be in the extent_io_tree */
+ clear_page_dirty_for_io(page);
+ page_cache_release(page);
+ index++;
+ }
+ return 0;
+}
+
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+{
+ unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ struct page *page;
+
+ while (index <= end_index) {
+ page = find_get_page(inode->i_mapping, index);
+ BUG_ON(!page); /* Pages should be in the extent_io_tree */
+ account_page_redirty(page);
+ __set_page_dirty_nobuffers(page);
+ page_cache_release(page);
+ index++;
+ }
+ return 0;
+}
+
/*
* helper function to set both pages and extents in the tree writeback
*/
@@ -1280,8 +1342,9 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
* return it. tree->lock must be held. NULL will returned if
* nothing was found after 'start'
*/
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
- u64 start, int bits)
+static struct extent_state *
+find_first_extent_bit_state(struct extent_io_tree *tree,
+ u64 start, unsigned long bits)
{
struct rb_node *node;
struct extent_state *state;
@@ -1315,7 +1378,7 @@ out:
* If nothing was found, 1 is returned. If found something, return 0.
*/
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits,
+ u64 *start_ret, u64 *end_ret, unsigned long bits,
struct extent_state **cached_state)
{
struct extent_state *state;
@@ -1605,7 +1668,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
unsigned long end_index = end >> PAGE_CACHE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
- int clear_bits = 0;
+ unsigned long clear_bits = 0;
if (op & EXTENT_CLEAR_UNLOCK)
clear_bits |= EXTENT_LOCKED;
@@ -1744,6 +1807,64 @@ out:
return ret;
}
+void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
+ int count)
+{
+ struct rb_node *node;
+ struct extent_state *state;
+
+ spin_lock(&tree->lock);
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = tree_search(tree, start);
+ BUG_ON(!node);
+
+ state = rb_entry(node, struct extent_state, rb_node);
+ BUG_ON(state->start != start);
+
+ while (count) {
+ state->private = *csums++;
+ count--;
+ state = next_state(state);
+ }
+ spin_unlock(&tree->lock);
+}
+
+static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio_index;
+
+ return page_offset(bvec->bv_page) + bvec->bv_offset;
+}
+
+void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
+ u32 csums[], int count)
+{
+ struct rb_node *node;
+ struct extent_state *state = NULL;
+ u64 start;
+
+ spin_lock(&tree->lock);
+ do {
+ start = __btrfs_get_bio_offset(bio, bio_index);
+ if (state == NULL || state->start != start) {
+ node = tree_search(tree, start);
+ BUG_ON(!node);
+
+ state = rb_entry(node, struct extent_state, rb_node);
+ BUG_ON(state->start != start);
+ }
+ state->private = *csums++;
+ count--;
+ bio_index++;
+
+ state = next_state(state);
+ } while (count);
+ spin_unlock(&tree->lock);
+}
+
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
{
struct rb_node *node;
@@ -1778,7 +1899,7 @@ out:
* range is found set.
*/
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled, struct extent_state *cached)
+ unsigned long bits, int filled, struct extent_state *cached)
{
struct extent_state *state = NULL;
struct rb_node *node;
@@ -1840,28 +1961,6 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
}
/*
- * helper function to unlock a page if all the extents in the tree
- * for that page are unlocked
- */
-static void check_page_locked(struct extent_io_tree *tree, struct page *page)
-{
- u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
- if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
- unlock_page(page);
-}
-
-/*
- * helper function to end page writeback if all the extents
- * in the tree for that page are done with writeback
- */
-static void check_page_writeback(struct extent_io_tree *tree,
- struct page *page)
-{
- end_page_writeback(page);
-}
-
-/*
* When IO fails, either with EIO or csum verification fails, we
* try other mirrors that might have a good copy of the data. This
* io_failure_record is used to record state as we go through all the
@@ -1938,7 +2037,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
return 0;
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_private = &compl;
@@ -2228,7 +2327,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
free_io_failure(inode, failrec, 0);
return -EIO;
@@ -2290,19 +2389,24 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
struct extent_io_tree *tree;
u64 start;
u64 end;
- int whole_page;
do {
struct page *page = bvec->bv_page;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
+ /* We always issue full-page reads, but if some block
+ * in a page fails to read, blk_update_request() will
+ * advance bv_offset and adjust bv_len to compensate.
+ * Print a warning for nonzero offsets, and an error
+ * if they don't add up to a full page. */
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ printk("%s page write in btrfs with offset %u and length %u\n",
+ bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ ? KERN_ERR "partial" : KERN_INFO "incomplete",
+ bvec->bv_offset, bvec->bv_len);
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
+ start = page_offset(page);
+ end = start + bvec->bv_offset + bvec->bv_len - 1;
if (--bvec >= bio->bi_io_vec)
prefetchw(&bvec->bv_page->flags);
@@ -2310,10 +2414,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
if (end_extent_writepage(page, err, start, end))
continue;
- if (whole_page)
- end_page_writeback(page);
- else
- check_page_writeback(tree, page);
+ end_page_writeback(page);
} while (bvec >= bio->bi_io_vec);
bio_put(bio);
@@ -2338,7 +2439,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct extent_io_tree *tree;
u64 start;
u64 end;
- int whole_page;
int mirror;
int ret;
@@ -2349,19 +2449,26 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
struct extent_state *state;
+ struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
- "mirror=%ld\n", (u64)bio->bi_sector, err,
- (long int)bio->bi_bdev);
+ "mirror=%lu\n", (u64)bio->bi_sector, err,
+ io_bio->mirror_num);
tree = &BTRFS_I(page->mapping->host)->io_tree;
- start = page_offset(page) + bvec->bv_offset;
- end = start + bvec->bv_len - 1;
+ /* We always issue full-page reads, but if some block
+ * in a page fails to read, blk_update_request() will
+ * advance bv_offset and adjust bv_len to compensate.
+ * Print a warning for nonzero offsets, and an error
+ * if they don't add up to a full page. */
+ if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE)
+ printk("%s page read in btrfs with offset %u and length %u\n",
+ bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE
+ ? KERN_ERR "partial" : KERN_INFO "incomplete",
+ bvec->bv_offset, bvec->bv_len);
- if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
- whole_page = 1;
- else
- whole_page = 0;
+ start = page_offset(page);
+ end = start + bvec->bv_offset + bvec->bv_len - 1;
if (++bvec <= bvec_end)
prefetchw(&bvec->bv_page->flags);
@@ -2377,7 +2484,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);
- mirror = (int)(unsigned long)bio->bi_bdev;
+ mirror = io_bio->mirror_num;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
@@ -2420,39 +2527,35 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
- if (whole_page) {
- if (uptodate) {
- SetPageUptodate(page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- unlock_page(page);
+ if (uptodate) {
+ SetPageUptodate(page);
} else {
- if (uptodate) {
- check_page_uptodate(tree, page);
- } else {
- ClearPageUptodate(page);
- SetPageError(page);
- }
- check_page_locked(tree, page);
+ ClearPageUptodate(page);
+ SetPageError(page);
}
+ unlock_page(page);
} while (bvec <= bvec_end);
bio_put(bio);
}
+/*
+ * this allocates from the btrfs_bioset. We're returning a bio right now
+ * but you can call btrfs_io_bio for the appropriate container_of magic
+ */
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;
- bio = bio_alloc(gfp_flags, nr_vecs);
+ bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
- while (!bio && (nr_vecs /= 2))
- bio = bio_alloc(gfp_flags, nr_vecs);
+ while (!bio && (nr_vecs /= 2)) {
+ bio = bio_alloc_bioset(gfp_flags,
+ nr_vecs, btrfs_bioset);
+ }
}
if (bio) {
@@ -2463,6 +2566,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
+{
+ return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
+}
+
+
+/* this also allocates from the btrfs_bioset */
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
+{
+ return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+}
+
+
static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
@@ -2527,8 +2643,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (old_compressed)
contig = bio->bi_sector == sector;
else
- contig = bio->bi_sector + (bio->bi_size >> 9) ==
- sector;
+ contig = bio_end_sector(bio) == sector;
if (prev_bio_flags != bio_flags || !contig ||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
@@ -2563,7 +2678,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
return ret;
}
-void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page)
+static void attach_extent_buffer_page(struct extent_buffer *eb,
+ struct page *page)
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
@@ -2593,7 +2709,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags)
+ unsigned long *bio_flags, int rw)
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
@@ -2739,7 +2855,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
pnr -= page->index;
- ret = submit_extent_page(READ, tree, page,
+ ret = submit_extent_page(rw, tree, page,
sector, disk_io_size, pg_offset,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
@@ -2772,7 +2888,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
int ret;
ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
- &bio_flags);
+ &bio_flags, READ);
if (bio)
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
return ret;
@@ -2841,7 +2957,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0);
+ page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
unlock_page(page);
return 0;
}
@@ -3071,7 +3187,7 @@ static int eb_wait(void *word)
return 0;
}
-static void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
+void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
{
wait_on_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK, eb_wait,
TASK_UNINTERRUPTIBLE);
@@ -3196,7 +3312,7 @@ static int write_one_eb(struct extent_buffer *eb,
u64 offset = eb->start;
unsigned long i, num_pages;
unsigned long bio_flags = 0;
- int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
+ int rw = (epd->sync_io ? WRITE_SYNC : WRITE) | REQ_META;
int ret = 0;
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
@@ -3633,14 +3749,14 @@ int extent_readpages(struct extent_io_tree *tree,
continue;
for (i = 0; i < nr; i++) {
__extent_read_full_page(tree, pagepool[i], get_extent,
- &bio, 0, &bio_flags);
+ &bio, 0, &bio_flags, READ);
page_cache_release(pagepool[i]);
}
nr = 0;
}
for (i = 0; i < nr; i++) {
__extent_read_full_page(tree, pagepool[i], get_extent,
- &bio, 0, &bio_flags);
+ &bio, 0, &bio_flags, READ);
page_cache_release(pagepool[i]);
}
@@ -3681,9 +3797,9 @@ int extent_invalidatepage(struct extent_io_tree *tree,
* are locked or under IO and drops the related state bits if it is safe
* to drop the page.
*/
-int try_release_extent_state(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask)
+static int try_release_extent_state(struct extent_map_tree *map,
+ struct extent_io_tree *tree,
+ struct page *page, gfp_t mask)
{
u64 start = page_offset(page);
u64 end = start + PAGE_CACHE_SIZE - 1;
@@ -3880,7 +3996,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
last_for_get_extent = isize;
}
- lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
&cached_state);
em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -3967,19 +4083,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
out_free:
free_extent_map(em);
out:
- unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state, GFP_NOFS);
return ret;
}
static void __free_extent_buffer(struct extent_buffer *eb)
{
-#if LEAK_DEBUG
- unsigned long flags;
- spin_lock_irqsave(&leak_lock, flags);
- list_del(&eb->leak_list);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+ btrfs_leak_debug_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
@@ -3989,9 +4100,6 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
gfp_t mask)
{
struct extent_buffer *eb = NULL;
-#if LEAK_DEBUG
- unsigned long flags;
-#endif
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
if (eb == NULL)
@@ -4011,11 +4119,8 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
-#if LEAK_DEBUG
- spin_lock_irqsave(&leak_lock, flags);
- list_add(&eb->leak_list, &buffers);
- spin_unlock_irqrestore(&leak_lock, flags);
-#endif
+ btrfs_leak_debug_add(&eb->leak_list, &buffers);
+
spin_lock_init(&eb->refs_lock);
atomic_set(&eb->refs, 1);
atomic_set(&eb->io_pages, 0);
@@ -4353,7 +4458,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
}
/* Expects to have eb->eb_lock already held */
-static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
+static int release_extent_buffer(struct extent_buffer *eb)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
@@ -4411,7 +4516,7 @@ void free_extent_buffer(struct extent_buffer *eb)
* I know this is terrible, but it's temporary until we stop tracking
* the uptodate bits and such for the extent buffers.
*/
- release_extent_buffer(eb, GFP_ATOMIC);
+ release_extent_buffer(eb);
}
void free_extent_buffer_stale(struct extent_buffer *eb)
@@ -4425,7 +4530,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb)
if (atomic_read(&eb->refs) == 2 && !extent_buffer_under_io(eb) &&
test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_dec(&eb->refs);
- release_extent_buffer(eb, GFP_NOFS);
+ release_extent_buffer(eb);
}
void clear_extent_buffer_dirty(struct extent_buffer *eb)
@@ -4477,17 +4582,6 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
return was_dirty;
}
-static int range_straddles_pages(u64 start, u64 len)
-{
- if (len < PAGE_CACHE_SIZE)
- return 1;
- if (start & (PAGE_CACHE_SIZE - 1))
- return 1;
- if ((start + len) & (PAGE_CACHE_SIZE - 1))
- return 1;
- return 0;
-}
-
int clear_extent_buffer_uptodate(struct extent_buffer *eb)
{
unsigned long i;
@@ -4519,37 +4613,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
return 0;
}
-int extent_range_uptodate(struct extent_io_tree *tree,
- u64 start, u64 end)
-{
- struct page *page;
- int ret;
- int pg_uptodate = 1;
- int uptodate;
- unsigned long index;
-
- if (range_straddles_pages(start, end - start + 1)) {
- ret = test_range_bit(tree, start, end,
- EXTENT_UPTODATE, 1, NULL);
- if (ret)
- return 1;
- }
- while (start <= end) {
- index = start >> PAGE_CACHE_SHIFT;
- page = find_get_page(tree->mapping, index);
- if (!page)
- return 1;
- uptodate = PageUptodate(page);
- page_cache_release(page);
- if (!uptodate) {
- pg_uptodate = 0;
- break;
- }
- start += PAGE_CACHE_SIZE;
- }
- return pg_uptodate;
-}
-
int extent_buffer_uptodate(struct extent_buffer *eb)
{
return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -4612,7 +4675,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
- mirror_num, &bio_flags);
+ mirror_num, &bio_flags,
+ READ | REQ_META);
if (err)
ret = err;
} else {
@@ -4621,7 +4685,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
if (bio) {
- err = submit_one_bio(READ, bio, mirror_num, bio_flags);
+ err = submit_one_bio(READ | REQ_META, bio, mirror_num,
+ bio_flags);
if (err)
return err;
}
@@ -4985,7 +5050,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
}
}
-int try_release_extent_buffer(struct page *page, gfp_t mask)
+int try_release_extent_buffer(struct page *page)
{
struct extent_buffer *eb;
@@ -5015,9 +5080,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
}
spin_unlock(&page->mapping->private_lock);
- if ((mask & GFP_NOFS) == GFP_NOFS)
- mask = GFP_NOFS;
-
/*
* If tree ref isn't set then we know the ref on this eb is a real ref,
* so just return, this page will likely be freed soon anyway.
@@ -5027,5 +5089,5 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
return 0;
}
- return release_extent_buffer(eb, mask);
+ return release_extent_buffer(eb);
}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6068a1985560..41fb81e7ec53 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -81,9 +81,9 @@ struct extent_io_ops {
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
+ unsigned long *bits);
void (*clear_bit_hook)(struct inode *inode, struct extent_state *state,
- int *bits);
+ unsigned long *bits);
void (*merge_extent_hook)(struct inode *inode,
struct extent_state *new,
struct extent_state *other);
@@ -116,7 +116,9 @@ struct extent_state {
/* for use by the FS */
u64 private;
+#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
+#endif
};
#define INLINE_EXTENT_BUFFER_PAGES 16
@@ -132,7 +134,6 @@ struct extent_buffer {
atomic_t refs;
atomic_t io_pages;
int read_mirror;
- struct list_head leak_list;
struct rcu_head rcu_head;
pid_t lock_owner;
@@ -159,6 +160,9 @@ struct extent_buffer {
wait_queue_head_t read_lock_wq;
wait_queue_head_t lock_wq;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
+#ifdef CONFIG_BTRFS_DEBUG
+ struct list_head leak_list;
+#endif
};
static inline void extent_set_compress_type(unsigned long *bio_flags,
@@ -185,13 +189,10 @@ void extent_io_tree_init(struct extent_io_tree *tree,
int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page,
gfp_t mask);
-int try_release_extent_buffer(struct page *page, gfp_t mask);
-int try_release_extent_state(struct extent_map_tree *map,
- struct extent_io_tree *tree, struct page *page,
- gfp_t mask);
+int try_release_extent_buffer(struct page *page);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, struct extent_state **cached);
+ unsigned long bits, struct extent_state **cached);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached, gfp_t mask);
@@ -207,16 +208,17 @@ u64 count_range_bits(struct extent_io_tree *tree,
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int filled, struct extent_state *cached_state);
+ unsigned long bits, int filled,
+ struct extent_state *cached_state);
int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask);
+ unsigned long bits, gfp_t mask);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int wake, int delete, struct extent_state **cached,
- gfp_t mask);
+ unsigned long bits, int wake, int delete,
+ struct extent_state **cached, gfp_t mask);
int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, gfp_t mask);
+ unsigned long bits, gfp_t mask);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, u64 *failed_start,
+ unsigned long bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask);
int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
@@ -229,17 +231,15 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
gfp_t mask);
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- int bits, int clear_bits,
+ unsigned long bits, unsigned long clear_bits,
struct extent_state **cached_state, gfp_t mask);
int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached_state, gfp_t mask);
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, int bits,
+ u64 *start_ret, u64 *end_ret, unsigned long bits,
struct extent_state **cached_state);
-struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
- u64 start, int bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
@@ -261,6 +261,10 @@ int extent_readpages(struct extent_io_tree *tree,
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent);
int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
+void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
+ int count);
+void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
+ int bvec_index, u32 csums[], int count);
int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
@@ -278,6 +282,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, u64 start, int wait,
get_extent_t *get_extent, int mirror_num);
+void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
static inline unsigned long num_extent_pages(u64 start, u64 len)
{
@@ -313,7 +318,6 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
unsigned long src_offset, unsigned long len);
void memset_extent_buffer(struct extent_buffer *eb, char c,
unsigned long start, unsigned long len);
-void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits);
void clear_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_dirty(struct extent_buffer *eb);
int set_extent_buffer_uptodate(struct extent_buffer *eb);
@@ -323,8 +327,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
unsigned long min_len, char **map,
unsigned long *map_start,
unsigned long *map_len);
-int extent_range_uptodate(struct extent_io_tree *tree,
- u64 start, u64 end);
+int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
int extent_clear_unlock_delalloc(struct inode *inode,
struct extent_io_tree *tree,
u64 start, u64 end, struct page *locked_page,
@@ -332,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
+struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
+struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);
struct btrfs_fs_info;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 2834ca5768ea..a4a7a1a8da95 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -174,6 +174,14 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
test_bit(EXTENT_FLAG_LOGGING, &next->flags))
return 0;
+ /*
+ * We don't want to merge stuff that hasn't been written to the log yet
+ * since it may not reflect exactly what is on disk, and that would be
+ * bad.
+ */
+ if (!list_empty(&prev->list) || !list_empty(&next->list))
+ return 0;
+
if (extent_map_end(prev) == next->start &&
prev->flags == next->flags &&
prev->bdev == next->bdev &&
@@ -209,9 +217,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
em->mod_start = merge->mod_start;
em->generation = max(em->generation, merge->generation);
- list_move(&em->list, &tree->modified_extents);
- list_del_init(&merge->list);
rb_erase(&merge->rb_node, &tree->map);
free_extent_map(merge);
}
@@ -227,7 +233,6 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
merge->in_tree = 0;
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
em->generation = max(em->generation, merge->generation);
- list_del_init(&merge->list);
free_extent_map(merge);
}
}
@@ -302,7 +307,7 @@ void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
* reference dropped if the merge attempt was successful.
*/
int add_extent_mapping(struct extent_map_tree *tree,
- struct extent_map *em)
+ struct extent_map *em, int modified)
{
int ret = 0;
struct rb_node *rb;
@@ -324,7 +329,10 @@ int add_extent_mapping(struct extent_map_tree *tree,
em->mod_start = em->start;
em->mod_len = em->len;
- try_merge_map(tree, em);
+ if (modified)
+ list_move(&em->list, &tree->modified_extents);
+ else
+ try_merge_map(tree, em);
out:
return ret;
}
@@ -337,8 +345,9 @@ static u64 range_end(u64 start, u64 len)
return start + len;
}
-struct extent_map *__lookup_extent_mapping(struct extent_map_tree *tree,
- u64 start, u64 len, int strict)
+static struct extent_map *
+__lookup_extent_mapping(struct extent_map_tree *tree,
+ u64 start, u64 len, int strict)
{
struct extent_map *em;
struct rb_node *rb_node;
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index c6598c89cff8..61adc44b7805 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,6 +26,7 @@ struct extent_map {
u64 mod_len;
u64 orig_start;
u64 orig_block_len;
+ u64 ram_bytes;
u64 block_start;
u64 block_len;
u64 generation;
@@ -61,7 +62,7 @@ void extent_map_tree_init(struct extent_map_tree *tree);
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
int add_extent_mapping(struct extent_map_tree *tree,
- struct extent_map *em);
+ struct extent_map *em, int modified);
int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
struct extent_map *alloc_extent_map(void);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index ec160202be3e..b193bf324a41 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -83,10 +83,11 @@ out:
return ret;
}
-struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- u64 bytenr, int cow)
+static struct btrfs_csum_item *
+btrfs_lookup_csum(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, int cow)
{
int ret;
struct btrfs_key file_key;
@@ -118,9 +119,11 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
csums_in_item /= csum_size;
- if (csum_offset >= csums_in_item) {
+ if (csum_offset == csums_in_item) {
ret = -EFBIG;
goto fail;
+ } else if (csum_offset > csums_in_item) {
+ goto fail;
}
}
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item);
@@ -150,32 +153,12 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
return ret;
}
-u64 btrfs_file_extent_length(struct btrfs_path *path)
-{
- int extent_type;
- struct btrfs_file_extent_item *fi;
- u64 len;
-
- fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(path->nodes[0], fi);
-
- if (extent_type == BTRFS_FILE_EXTENT_REG ||
- extent_type == BTRFS_FILE_EXTENT_PREALLOC)
- len = btrfs_file_extent_num_bytes(path->nodes[0], fi);
- else if (extent_type == BTRFS_FILE_EXTENT_INLINE)
- len = btrfs_file_extent_inline_len(path->nodes[0], fi);
- else
- BUG();
-
- return len;
-}
-
static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
struct inode *inode, struct bio *bio,
u64 logical_offset, u32 *dst, int dio)
{
- u32 sum;
+ u32 sum[16];
+ int len;
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
u64 offset = 0;
@@ -184,7 +167,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
u64 disk_bytenr;
u32 diff;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- int ret;
+ int count;
struct btrfs_path *path;
struct btrfs_csum_item *item = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -212,10 +195,12 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (dio)
offset = logical_offset;
while (bio_index < bio->bi_vcnt) {
+ len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
- if (ret == 0)
+ count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum,
+ len);
+ if (count)
goto found;
if (!item || disk_bytenr < item_start_offset ||
@@ -228,10 +213,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
path, disk_bytenr, 0);
if (IS_ERR(item)) {
- ret = PTR_ERR(item);
- if (ret == -ENOENT || ret == -EFBIG)
- ret = 0;
- sum = 0;
+ count = 1;
+ sum[0] = 0;
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
@@ -267,19 +250,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
diff = disk_bytenr - item_start_offset;
diff = diff / root->sectorsize;
diff = diff * csum_size;
-
- read_extent_buffer(path->nodes[0], &sum,
+ count = min_t(int, len, (item_last_offset - disk_bytenr) >>
+ inode->i_sb->s_blocksize_bits);
+ read_extent_buffer(path->nodes[0], sum,
((unsigned long)item) + diff,
- csum_size);
+ csum_size * count);
found:
- if (dst)
- *dst++ = sum;
- else
- set_state_private(io_tree, offset, sum);
- disk_bytenr += bvec->bv_len;
- offset += bvec->bv_len;
- bio_index++;
- bvec++;
+ if (dst) {
+ memcpy(dst, sum, count * csum_size);
+ dst += count;
+ } else {
+ if (dio)
+ extent_cache_csums_dio(io_tree, offset, sum,
+ count);
+ else
+ extent_cache_csums(io_tree, bio, bio_index, sum,
+ count);
+ }
+ while (count--) {
+ disk_bytenr += bvec->bv_len;
+ offset += bvec->bv_len;
+ bio_index++;
+ bvec++;
+ }
}
btrfs_free_path(path);
return 0;
@@ -356,11 +349,8 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
- key.type != BTRFS_EXTENT_CSUM_KEY)
- break;
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.offset > end)
+ key.type != BTRFS_EXTENT_CSUM_KEY ||
+ key.offset > end)
break;
if (key.offset > start)
@@ -482,8 +472,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
data = kmap_atomic(bvec->bv_page);
sector_sum->sum = ~(u32)0;
- sector_sum->sum = btrfs_csum_data(root,
- data + bvec->bv_offset,
+ sector_sum->sum = btrfs_csum_data(data + bvec->bv_offset,
sector_sum->sum,
bvec->bv_len);
kunmap_atomic(data);
@@ -516,8 +505,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
* This calls btrfs_truncate_item with the correct args based on the
* overlap, and fixes up the key as required.
*/
-static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static noinline void truncate_one_csum(struct btrfs_root *root,
struct btrfs_path *path,
struct btrfs_key *key,
u64 bytenr, u64 len)
@@ -542,7 +530,7 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
*/
u32 new_size = (bytenr - key->offset) >> blocksize_bits;
new_size *= csum_size;
- btrfs_truncate_item(trans, root, path, new_size, 1);
+ btrfs_truncate_item(root, path, new_size, 1);
} else if (key->offset >= bytenr && csum_end > end_byte &&
end_byte > key->offset) {
/*
@@ -554,10 +542,10 @@ static noinline void truncate_one_csum(struct btrfs_trans_handle *trans,
u32 new_size = (csum_end - end_byte) >> blocksize_bits;
new_size *= csum_size;
- btrfs_truncate_item(trans, root, path, new_size, 0);
+ btrfs_truncate_item(root, path, new_size, 0);
key->offset = end_byte;
- btrfs_set_item_key_safe(trans, root, path, key);
+ btrfs_set_item_key_safe(root, path, key);
} else {
BUG();
}
@@ -672,7 +660,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
key.offset = end_byte - 1;
} else {
- truncate_one_csum(trans, root, path, &key, bytenr, len);
+ truncate_one_csum(root, path, &key, bytenr, len);
if (key.offset < bytenr)
break;
}
@@ -728,7 +716,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
return -ENOMEM;
sector_sum = sums->sums;
- trans->adding_csums = 1;
again:
next_offset = (u64)-1;
found_next = 0;
@@ -834,7 +821,7 @@ again:
diff /= csum_size;
diff *= csum_size;
- btrfs_extend_item(trans, root, path, diff);
+ btrfs_extend_item(root, path, diff);
goto csum;
}
@@ -899,7 +886,6 @@ next_sector:
goto again;
}
out:
- trans->adding_csums = 0;
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index af1d0605a5c1..4205ba752d40 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
@@ -192,8 +193,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
* the same inode in the tree, we will merge them together (by
* __btrfs_add_inode_defrag()) and free the one that we want to requeue.
*/
-void btrfs_requeue_inode_defrag(struct inode *inode,
- struct inode_defrag *defrag)
+static void btrfs_requeue_inode_defrag(struct inode *inode,
+ struct inode_defrag *defrag)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
@@ -473,7 +474,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
/*
* unlocks pages after btrfs_file_write is done with them
*/
-void btrfs_drop_pages(struct page **pages, size_t num_pages)
+static void btrfs_drop_pages(struct page **pages, size_t num_pages)
{
size_t i;
for (i = 0; i < num_pages; i++) {
@@ -497,9 +498,9 @@ void btrfs_drop_pages(struct page **pages, size_t num_pages)
* doing real data extents, marking pages dirty and delalloc as required.
*/
int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
- struct page **pages, size_t num_pages,
- loff_t pos, size_t write_bytes,
- struct extent_state **cached)
+ struct page **pages, size_t num_pages,
+ loff_t pos, size_t write_bytes,
+ struct extent_state **cached)
{
int err = 0;
int i;
@@ -552,6 +553,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int testend = 1;
unsigned long flags;
int compressed = 0;
+ bool modified;
WARN_ON(end < start);
if (end == (u64)-1) {
@@ -561,6 +563,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
while (1) {
int no_splits = 0;
+ modified = false;
if (!split)
split = alloc_extent_map();
if (!split2)
@@ -591,6 +594,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
}
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+ clear_bit(EXTENT_FLAG_LOGGING, &flags);
+ modified = !list_empty(&em->list);
remove_extent_mapping(em_tree, em);
if (no_splits)
goto next;
@@ -606,15 +611,15 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->block_len = em->block_len;
else
split->block_len = split->len;
+ split->ram_bytes = em->ram_bytes;
split->orig_block_len = max(split->block_len,
em->orig_block_len);
split->generation = gen;
split->bdev = em->bdev;
split->flags = flags;
split->compress_type = em->compress_type;
- ret = add_extent_mapping(em_tree, split);
+ ret = add_extent_mapping(em_tree, split, modified);
BUG_ON(ret); /* Logic error */
- list_move(&split->list, &em_tree->modified_extents);
free_extent_map(split);
split = split2;
split2 = NULL;
@@ -631,6 +636,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->generation = gen;
split->orig_block_len = max(em->block_len,
em->orig_block_len);
+ split->ram_bytes = em->ram_bytes;
if (compressed) {
split->block_len = em->block_len;
@@ -642,9 +648,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split->orig_start = em->orig_start;
}
- ret = add_extent_mapping(em_tree, split);
+ ret = add_extent_mapping(em_tree, split, modified);
BUG_ON(ret); /* Logic error */
- list_move(&split->list, &em_tree->modified_extents);
free_extent_map(split);
split = NULL;
}
@@ -820,7 +825,7 @@ next_slot:
memcpy(&new_key, &key, sizeof(new_key));
new_key.offset = end;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
+ btrfs_set_item_key_safe(root, path, &new_key);
extent_offset += end - key.offset;
btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1036,7 +1041,7 @@ again:
ino, bytenr, orig_offset,
&other_start, &other_end)) {
new_key.offset = end;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
+ btrfs_set_item_key_safe(root, path, &new_key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_generation(leaf, fi,
@@ -1070,7 +1075,7 @@ again:
trans->transid);
path->slots[0]++;
new_key.offset = start;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
+ btrfs_set_item_key_safe(root, path, &new_key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -1513,8 +1518,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
size_t count, ocount;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
- sb_start_write(inode->i_sb);
-
mutex_lock(&inode->i_mutex);
err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -1616,7 +1619,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
if (sync)
atomic_dec(&BTRFS_I(inode)->sync_writers);
out:
- sb_end_write(inode->i_sb);
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
@@ -1884,7 +1886,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
path->slots[0]++;
key.offset = offset;
- btrfs_set_item_key_safe(trans, root, path, &key);
+ btrfs_set_item_key_safe(root, path, &key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -1914,6 +1916,7 @@ out:
} else {
hole_em->start = offset;
hole_em->len = end - offset;
+ hole_em->ram_bytes = hole_em->len;
hole_em->orig_start = offset;
hole_em->block_start = EXTENT_MAP_HOLE;
@@ -1926,10 +1929,7 @@ out:
do {
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, hole_em);
- if (!ret)
- list_move(&hole_em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, hole_em, 1);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
free_extent_map(hole_em);
@@ -2141,6 +2141,7 @@ static long btrfs_fallocate(struct file *file, int mode,
{
struct inode *inode = file_inode(file);
struct extent_state *cached_state = NULL;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
u64 cur_offset;
u64 last_byte;
u64 alloc_start;
@@ -2168,6 +2169,11 @@ static long btrfs_fallocate(struct file *file, int mode,
ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
if (ret)
return ret;
+ if (root->fs_info->quota_enabled) {
+ ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
+ if (ret)
+ goto out_reserve_fail;
+ }
/*
* wait for ordered IO before we have any locks. We'll loop again
@@ -2271,6 +2277,9 @@ static long btrfs_fallocate(struct file *file, int mode,
&cached_state, GFP_NOFS);
out:
mutex_unlock(&inode->i_mutex);
+ if (root->fs_info->quota_enabled)
+ btrfs_qgroup_free(root, alloc_end - alloc_start);
+out_reserve_fail:
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 1f84fc09c1a8..2750b5023526 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -104,7 +104,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
spin_lock(&block_group->lock);
if (!((BTRFS_I(inode)->flags & flags) == flags)) {
- printk(KERN_INFO "Old style space inode found, converting.\n");
+ btrfs_info(root->fs_info,
+ "Old style space inode found, converting.");
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
BTRFS_INODE_NODATACOW;
block_group->disk_cache_state = BTRFS_DC_CLEAR;
@@ -119,9 +120,10 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
return inode;
}
-int __create_free_space_inode(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path, u64 ino, u64 offset)
+static int __create_free_space_inode(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ u64 ino, u64 offset)
{
struct btrfs_key key;
struct btrfs_disk_key disk_key;
@@ -195,30 +197,32 @@ int create_free_space_inode(struct btrfs_root *root,
block_group->key.objectid);
}
-int btrfs_truncate_free_space_cache(struct btrfs_root *root,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path,
- struct inode *inode)
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv)
{
- struct btrfs_block_rsv *rsv;
u64 needed_bytes;
- loff_t oldsize;
- int ret = 0;
-
- rsv = trans->block_rsv;
- trans->block_rsv = &root->fs_info->global_block_rsv;
+ int ret;
/* 1 for slack space, 1 for updating the inode */
needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
btrfs_calc_trans_metadata_size(root, 1);
- spin_lock(&trans->block_rsv->lock);
- if (trans->block_rsv->reserved < needed_bytes) {
- spin_unlock(&trans->block_rsv->lock);
- trans->block_rsv = rsv;
- return -ENOSPC;
- }
- spin_unlock(&trans->block_rsv->lock);
+ spin_lock(&rsv->lock);
+ if (rsv->reserved < needed_bytes)
+ ret = -ENOSPC;
+ else
+ ret = 0;
+ spin_unlock(&rsv->lock);
+ return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct inode *inode)
+{
+ loff_t oldsize;
+ int ret = 0;
oldsize = i_size_read(inode);
btrfs_i_size_write(inode, 0);
@@ -230,9 +234,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
*/
ret = btrfs_truncate_inode_items(trans, root, inode,
0, BTRFS_EXTENT_DATA_KEY);
-
if (ret) {
- trans->block_rsv = rsv;
btrfs_abort_transaction(trans, root, ret);
return ret;
}
@@ -240,7 +242,6 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
ret = btrfs_update_inode(trans, root, inode);
if (ret)
btrfs_abort_transaction(trans, root, ret);
- trans->block_rsv = rsv;
return ret;
}
@@ -431,7 +432,7 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
if (index == 0)
offset = sizeof(u32) * io_ctl->num_pages;
- crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
+ crc = btrfs_csum_data(io_ctl->orig + offset, crc,
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
io_ctl_unmap_page(io_ctl);
@@ -461,7 +462,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
kunmap(io_ctl->pages[0]);
io_ctl_map_page(io_ctl, 0);
- crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
+ crc = btrfs_csum_data(io_ctl->orig + offset, crc,
PAGE_CACHE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
if (val != crc) {
@@ -624,9 +625,9 @@ next:
spin_unlock(&ctl->tree_lock);
}
-int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
- struct btrfs_free_space_ctl *ctl,
- struct btrfs_path *path, u64 offset)
+static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
+ struct btrfs_free_space_ctl *ctl,
+ struct btrfs_path *path, u64 offset)
{
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
@@ -669,10 +670,11 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
btrfs_release_path(path);
if (BTRFS_I(inode)->generation != generation) {
- printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
- " not match free space cache generation (%llu)\n",
- (unsigned long long)BTRFS_I(inode)->generation,
- (unsigned long long)generation);
+ btrfs_err(root->fs_info,
+ "free space inode generation (%llu) "
+ "did not match free space cache generation (%llu)",
+ (unsigned long long)BTRFS_I(inode)->generation,
+ (unsigned long long)generation);
return 0;
}
@@ -721,8 +723,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ret = link_free_space(ctl, e);
spin_unlock(&ctl->tree_lock);
if (ret) {
- printk(KERN_ERR "Duplicate entries in "
- "free space cache, dumping\n");
+ btrfs_err(root->fs_info,
+ "Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
@@ -741,8 +743,8 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
ctl->op->recalc_thresholds(ctl);
spin_unlock(&ctl->tree_lock);
if (ret) {
- printk(KERN_ERR "Duplicate entries in "
- "free space cache, dumping\n");
+ btrfs_err(root->fs_info,
+ "Duplicate entries in free space cache, dumping");
kmem_cache_free(btrfs_free_space_cachep, e);
goto free_cache;
}
@@ -833,8 +835,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
if (!matched) {
__btrfs_remove_free_space_cache(ctl);
- printk(KERN_ERR "block group %llu has an wrong amount of free "
- "space\n", block_group->key.objectid);
+ btrfs_err(fs_info, "block group %llu has wrong amount of free space",
+ block_group->key.objectid);
ret = -1;
}
out:
@@ -845,8 +847,8 @@ out:
spin_unlock(&block_group->lock);
ret = 0;
- printk(KERN_ERR "btrfs: failed to load free space cache "
- "for block group %llu\n", block_group->key.objectid);
+ btrfs_err(fs_info, "failed to load free space cache for block group %llu",
+ block_group->key.objectid);
}
iput(inode);
@@ -866,11 +868,11 @@ out:
* on mount. This will return 0 if it was successfull in writing the cache out,
* and -1 if it was not.
*/
-int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
- struct btrfs_free_space_ctl *ctl,
- struct btrfs_block_group_cache *block_group,
- struct btrfs_trans_handle *trans,
- struct btrfs_path *path, u64 offset)
+static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
+ struct btrfs_free_space_ctl *ctl,
+ struct btrfs_block_group_cache *block_group,
+ struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, u64 offset)
{
struct btrfs_free_space_header *header;
struct extent_buffer *leaf;
@@ -917,10 +919,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
/* Make sure we can fit our crcs into the first page */
if (io_ctl.check_crcs &&
- (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
- WARN_ON(1);
+ (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
goto out_nospc;
- }
io_ctl_set_generation(&io_ctl, trans->transid);
@@ -1104,8 +1104,9 @@ int btrfs_write_out_cache(struct btrfs_root *root,
spin_unlock(&block_group->lock);
ret = 0;
#ifdef DEBUG
- printk(KERN_ERR "btrfs: failed to write free space cache "
- "for block group %llu\n", block_group->key.objectid);
+ btrfs_err(root->fs_info,
+ "failed to write free space cache for block group %llu",
+ block_group->key.objectid);
#endif
}
@@ -1564,7 +1565,8 @@ again:
search_bytes = ctl->unit;
search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
- BUG_ON(ret < 0 || search_start != *offset);
+ if (ret < 0 || search_start != *offset)
+ return -EINVAL;
/* We may have found more bits than what we need */
search_bytes = min(search_bytes, *bytes);
@@ -1970,7 +1972,6 @@ again:
re_search = true;
goto again;
}
- BUG_ON(ret); /* logic error */
out_lock:
spin_unlock(&ctl->tree_lock);
out:
@@ -2064,7 +2065,8 @@ out:
return 0;
}
-void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
+static void __btrfs_remove_free_space_cache_locked(
+ struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *node;
@@ -2931,8 +2933,9 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
ret = __load_free_space_cache(root, inode, ctl, path, 0);
if (ret < 0)
- printk(KERN_ERR "btrfs: failed to load free ino cache for "
- "root %llu\n", root->root_key.objectid);
+ btrfs_err(fs_info,
+ "failed to load free ino cache for root %llu",
+ root->root_key.objectid);
out_put:
iput(inode);
out:
@@ -2959,11 +2962,531 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
if (ret) {
btrfs_delalloc_release_metadata(inode, inode->i_size);
#ifdef DEBUG
- printk(KERN_ERR "btrfs: failed to write free ino cache "
- "for root %llu\n", root->root_key.objectid);
+ btrfs_err(root->fs_info,
+ "failed to write free ino cache for root %llu",
+ root->root_key.objectid);
#endif
}
iput(inode);
return ret;
}
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+static struct btrfs_block_group_cache *init_test_block_group(void)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = kzalloc(sizeof(*cache), GFP_NOFS);
+ if (!cache)
+ return NULL;
+ cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+ GFP_NOFS);
+ if (!cache->free_space_ctl) {
+ kfree(cache);
+ return NULL;
+ }
+
+ cache->key.objectid = 0;
+ cache->key.offset = 1024 * 1024 * 1024;
+ cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+ cache->sectorsize = 4096;
+
+ spin_lock_init(&cache->lock);
+ INIT_LIST_HEAD(&cache->list);
+ INIT_LIST_HEAD(&cache->cluster_list);
+ INIT_LIST_HEAD(&cache->new_bg_list);
+
+ btrfs_init_free_space_ctl(cache);
+
+ return cache;
+}
+
+/*
+ * Checks to see if the given range is in the free space cache. This is really
+ * just used to check the absence of space, so if there is free space in the
+ * range at all we will return 1.
+ */
+static int check_exists(struct btrfs_block_group_cache *cache, u64 offset,
+ u64 bytes)
+{
+ struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
+ struct btrfs_free_space *info;
+ int ret = 0;
+
+ spin_lock(&ctl->tree_lock);
+ info = tree_search_offset(ctl, offset, 0, 0);
+ if (!info) {
+ info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
+ 1, 0);
+ if (!info)
+ goto out;
+ }
+
+have_info:
+ if (info->bitmap) {
+ u64 bit_off, bit_bytes;
+ struct rb_node *n;
+ struct btrfs_free_space *tmp;
+
+ bit_off = offset;
+ bit_bytes = ctl->unit;
+ ret = search_bitmap(ctl, info, &bit_off, &bit_bytes);
+ if (!ret) {
+ if (bit_off == offset) {
+ ret = 1;
+ goto out;
+ } else if (bit_off > offset &&
+ offset + bytes > bit_off) {
+ ret = 1;
+ goto out;
+ }
+ }
+
+ n = rb_prev(&info->offset_index);
+ while (n) {
+ tmp = rb_entry(n, struct btrfs_free_space,
+ offset_index);
+ if (tmp->offset + tmp->bytes < offset)
+ break;
+ if (offset + bytes < tmp->offset) {
+ n = rb_prev(&info->offset_index);
+ continue;
+ }
+ info = tmp;
+ goto have_info;
+ }
+
+ n = rb_next(&info->offset_index);
+ while (n) {
+ tmp = rb_entry(n, struct btrfs_free_space,
+ offset_index);
+ if (offset + bytes < tmp->offset)
+ break;
+ if (tmp->offset + tmp->bytes < offset) {
+ n = rb_next(&info->offset_index);
+ continue;
+ }
+ info = tmp;
+ goto have_info;
+ }
+
+ goto out;
+ }
+
+ if (info->offset == offset) {
+ ret = 1;
+ goto out;
+ }
+
+ if (offset > info->offset && offset < info->offset + info->bytes)
+ ret = 1;
+out:
+ spin_unlock(&ctl->tree_lock);
+ return ret;
+}
+
+/*
+ * Use this if you need to make a bitmap or extent entry specifically, it
+ * doesn't do any of the merging that add_free_space does, this acts a lot like
+ * how the free space cache loading stuff works, so you can get really weird
+ * configurations.
+ */
+static int add_free_space_entry(struct btrfs_block_group_cache *cache,
+ u64 offset, u64 bytes, bool bitmap)
+{
+ struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
+ struct btrfs_free_space *info = NULL, *bitmap_info;
+ void *map = NULL;
+ u64 bytes_added;
+ int ret;
+
+again:
+ if (!info) {
+ info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
+ if (!info)
+ return -ENOMEM;
+ }
+
+ if (!bitmap) {
+ spin_lock(&ctl->tree_lock);
+ info->offset = offset;
+ info->bytes = bytes;
+ ret = link_free_space(ctl, info);
+ spin_unlock(&ctl->tree_lock);
+ if (ret)
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ return ret;
+ }
+
+ if (!map) {
+ map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ if (!map) {
+ kmem_cache_free(btrfs_free_space_cachep, info);
+ return -ENOMEM;
+ }
+ }
+
+ spin_lock(&ctl->tree_lock);
+ bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
+ 1, 0);
+ if (!bitmap_info) {
+ info->bitmap = map;
+ map = NULL;
+ add_new_bitmap(ctl, info, offset);
+ bitmap_info = info;
+ }
+
+ bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+ bytes -= bytes_added;
+ offset += bytes_added;
+ spin_unlock(&ctl->tree_lock);
+
+ if (bytes)
+ goto again;
+
+ if (map)
+ kfree(map);
+ return 0;
+}
+
+/*
+ * This test just does basic sanity checking, making sure we can add an exten
+ * entry and remove space from either end and the middle, and make sure we can
+ * remove space that covers adjacent extent entries.
+ */
+static int test_extents(struct btrfs_block_group_cache *cache)
+{
+ int ret = 0;
+
+ printk(KERN_ERR "Running extent only tests\n");
+
+ /* First just make sure we can remove an entire entry */
+ ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error adding initial extents %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error removing extent %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 0, 4 * 1024 * 1024)) {
+ printk(KERN_ERR "Full remove left some lingering space\n");
+ return -1;
+ }
+
+ /* Ok edge and middle cases now */
+ ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error adding half extent %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error removing tail end %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error removing front end %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
+ if (ret) {
+ printk(KERN_ERR "Error removing middle piece %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 0, 1 * 1024 * 1024)) {
+ printk(KERN_ERR "Still have space at the front\n");
+ return -1;
+ }
+
+ if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
+ printk(KERN_ERR "Still have space in the middle\n");
+ return -1;
+ }
+
+ if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
+ printk(KERN_ERR "Still have space at the end\n");
+ return -1;
+ }
+
+ /* Cleanup */
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ return 0;
+}
+
+static int test_bitmaps(struct btrfs_block_group_cache *cache)
+{
+ u64 next_bitmap_offset;
+ int ret;
+
+ printk(KERN_ERR "Running bitmap only tests\n");
+
+ ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't create a bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error removing bitmap full range %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 0, 4 * 1024 * 1024)) {
+ printk(KERN_ERR "Left some space in bitmap\n");
+ return -1;
+ }
+
+ ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add to our bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Couldn't remove middle chunk %d\n", ret);
+ return ret;
+ }
+
+ /*
+ * The first bitmap we have starts at offset 0 so the next one is just
+ * at the end of the first bitmap.
+ */
+ next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+
+ /* Test a bit straddling two bitmaps */
+ ret = add_free_space_entry(cache, next_bitmap_offset -
+ (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add space that straddles two bitmaps"
+ " %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, next_bitmap_offset -
+ (1 * 1024 * 1024), 2 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
+ 2 * 1024 * 1024)) {
+ printk(KERN_ERR "Left some space when removing overlapping\n");
+ return -1;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ return 0;
+}
+
+/* This is the high grade jackassery */
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+{
+ u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+ int ret;
+
+ printk(KERN_ERR "Running bitmap and extent tests\n");
+
+ /*
+ * First let's do something simple, an extent at the same offset as the
+ * bitmap, but the free space completely in the extent and then
+ * completely in the bitmap.
+ */
+ ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't create bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Couldn't remove extent entry %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 0, 1 * 1024 * 1024)) {
+ printk(KERN_ERR "Left remnants after our remove\n");
+ return -1;
+ }
+
+ /* Now to add back the extent entry and remove from the bitmap */
+ ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+ if (ret) {
+ printk(KERN_ERR "Couldn't re-add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Couldn't remove from bitmap %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
+ printk(KERN_ERR "Left remnants in the bitmap\n");
+ return -1;
+ }
+
+ /*
+ * Ok so a little more evil, extent entry and bitmap at the same offset,
+ * removing an overlapping chunk.
+ */
+ ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add to a bitmap %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Couldn't remove overlapping space %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
+ printk(KERN_ERR "Left over peices after removing "
+ "overlapping\n");
+ return -1;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ /* Now with the extent entry offset into the bitmap */
+ ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add space to the bitmap %d\n", ret);
+ return ret;
+ }
+
+ ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add extent to the cache %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Problem removing overlapping space %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
+ printk(KERN_ERR "Left something behind when removing space");
+ return -1;
+ }
+
+ /*
+ * This has blown up in the past, the extent entry starts before the
+ * bitmap entry, but we're trying to remove an offset that falls
+ * completely within the bitmap range and is in both the extent entry
+ * and the bitmap entry, looks like this
+ *
+ * [ extent ]
+ * [ bitmap ]
+ * [ del ]
+ */
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
+ 4 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add bitmap %d\n", ret);
+ return ret;
+ }
+
+ ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
+ 5 * 1024 * 1024, 0);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
+ 5 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Failed to free our space %d\n", ret);
+ return ret;
+ }
+
+ if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
+ 5 * 1024 * 1024)) {
+ printk(KERN_ERR "Left stuff over\n");
+ return -1;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+ /*
+ * This blew up before, we have part of the free space in a bitmap and
+ * then the entirety of the rest of the space in an extent. This used
+ * to return -EAGAIN back from btrfs_remove_extent, make sure this
+ * doesn't happen.
+ */
+ ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add bitmap entry %d\n", ret);
+ return ret;
+ }
+
+ ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
+ if (ret) {
+ printk(KERN_ERR "Couldn't add extent entry %d\n", ret);
+ return ret;
+ }
+
+ ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
+ if (ret) {
+ printk(KERN_ERR "Error removing bitmap and extent "
+ "overlapping %d\n", ret);
+ return ret;
+ }
+
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ return 0;
+}
+
+void btrfs_test_free_space_cache(void)
+{
+ struct btrfs_block_group_cache *cache;
+
+ printk(KERN_ERR "Running btrfs free space cache tests\n");
+
+ cache = init_test_block_group();
+ if (!cache) {
+ printk(KERN_ERR "Couldn't run the tests\n");
+ return;
+ }
+
+ if (test_extents(cache))
+ goto out;
+ if (test_bitmaps(cache))
+ goto out;
+ if (test_bitmaps_and_extents(cache))
+ goto out;
+out:
+ __btrfs_remove_free_space_cache(cache->free_space_ctl);
+ kfree(cache->free_space_ctl);
+ kfree(cache);
+ printk(KERN_ERR "Free space cache tests finished\n");
+}
+#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 8f2613f779ed..8b7f19f44961 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -54,6 +54,8 @@ int create_free_space_inode(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group,
struct btrfs_path *path);
+int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
+ struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_root *root,
struct btrfs_trans_handle *trans,
struct btrfs_path *path,
@@ -110,4 +112,9 @@ int btrfs_return_cluster_to_free_space(
struct btrfs_free_cluster *cluster);
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen);
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_free_space_cache(void);
+#endif
+
#endif
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c
index 48b8fda93132..e0b7034d6343 100644
--- a/fs/btrfs/inode-item.c
+++ b/fs/btrfs/inode-item.c
@@ -183,10 +183,11 @@ int btrfs_get_inode_ref_index(struct btrfs_trans_handle *trans,
return -ENOENT;
}
-int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- const char *name, int name_len,
- u64 inode_objectid, u64 ref_objectid, u64 *index)
+static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ const char *name, int name_len,
+ u64 inode_objectid, u64 ref_objectid,
+ u64 *index)
{
struct btrfs_path *path;
struct btrfs_key key;
@@ -246,7 +247,7 @@ int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
memmove_extent_buffer(leaf, ptr, ptr + del_len,
item_size - (ptr + del_len - item_start));
- btrfs_truncate_item(trans, root, path, item_size - del_len, 1);
+ btrfs_truncate_item(root, path, item_size - del_len, 1);
out:
btrfs_free_path(path);
@@ -309,7 +310,7 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans,
item_start = btrfs_item_ptr_offset(leaf, path->slots[0]);
memmove_extent_buffer(leaf, ptr, ptr + sub_item_len,
item_size - (ptr + sub_item_len - item_start));
- btrfs_truncate_item(trans, root, path, item_size - sub_item_len, 1);
+ btrfs_truncate_item(root, path, item_size - sub_item_len, 1);
out:
btrfs_free_path(path);
@@ -361,7 +362,7 @@ static int btrfs_insert_inode_extref(struct btrfs_trans_handle *trans,
name, name_len, NULL))
goto out;
- btrfs_extend_item(trans, root, path, ins_len);
+ btrfs_extend_item(root, path, ins_len);
ret = 0;
}
if (ret < 0)
@@ -417,7 +418,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans,
goto out;
old_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
- btrfs_extend_item(trans, root, path, ins_len);
+ btrfs_extend_item(root, path, ins_len);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_ref);
ref = (struct btrfs_inode_ref *)((unsigned long)ref + old_size);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index d26f67a59e36..2c66ddbbe670 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -429,11 +429,12 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
num_bytes = trans->bytes_reserved;
/*
* 1 item for inode item insertion if need
- * 3 items for inode item update (in the worst case)
+ * 4 items for inode item update (in the worst case)
+ * 1 items for slack space if we need do truncation
* 1 item for free space object
* 3 items for pre-allocation
*/
- trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8);
+ trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 10);
ret = btrfs_block_rsv_add(root, trans->block_rsv,
trans->bytes_reserved,
BTRFS_RESERVE_NO_FLUSH);
@@ -468,7 +469,8 @@ again:
if (i_size_read(inode) > 0) {
ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
if (ret) {
- btrfs_abort_transaction(trans, root, ret);
+ if (ret != -ENOSPC)
+ btrfs_abort_transaction(trans, root, ret);
goto out_put;
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c226daefd65d..db57e6384fbb 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,6 +32,7 @@
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
+#include <linux/aio.h>
#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
@@ -100,7 +101,10 @@ static noinline int cow_file_range(struct inode *inode,
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
- u64 orig_block_len, int type);
+ u64 orig_block_len, u64 ram_bytes,
+ int type);
+
+static int btrfs_dirty_inode(struct inode *inode);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
@@ -353,6 +357,7 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
int compress_type = root->fs_info->compress_type;
+ int redirty = 0;
/* if this is a small write inside eof, kick off a defrag */
if ((end - start + 1) < 16 * 1024 &&
@@ -415,6 +420,17 @@ again:
if (BTRFS_I(inode)->force_compress)
compress_type = BTRFS_I(inode)->force_compress;
+ /*
+ * we need to call clear_page_dirty_for_io on each
+ * page in the range. Otherwise applications with the file
+ * mmap'd can wander in and change the page contents while
+ * we are compressing them.
+ *
+ * If the compression fails for any reason, we set the pages
+ * dirty again later on.
+ */
+ extent_range_clear_dirty_for_io(inode, start, end);
+ redirty = 1;
ret = btrfs_compress_pages(compress_type,
inode->i_mapping, start,
total_compressed, pages,
@@ -554,6 +570,8 @@ cleanup_and_bail_uncompressed:
__set_page_dirty_nobuffers(locked_page);
/* unlocked later on in the async handlers */
}
+ if (redirty)
+ extent_range_redirty_for_io(inode, start, end);
add_async_extent(async_cow, start, end - start + 1,
0, NULL, 0, BTRFS_COMPRESS_NONE);
*num_added += 1;
@@ -697,8 +715,10 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map();
- if (!em)
+ if (!em) {
+ ret = -ENOMEM;
goto out_free_reserve;
+ }
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
@@ -708,6 +728,7 @@ retry:
em->block_start = ins.objectid;
em->block_len = ins.offset;
em->orig_block_len = ins.offset;
+ em->ram_bytes = async_extent->ram_size;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->compress_type = async_extent->compress_type;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
@@ -716,10 +737,7 @@ retry:
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -907,7 +925,10 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
}
em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
+ if (!em) {
+ ret = -ENOMEM;
+ goto out_reserve;
+ }
em->start = start;
em->orig_start = em->start;
ram_size = ins.offset;
@@ -918,16 +939,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
em->block_start = ins.objectid;
em->block_len = ins.offset;
em->orig_block_len = ins.offset;
+ em->ram_bytes = ram_size;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -936,11 +955,14 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
btrfs_drop_extent_cache(inode, start,
start + ram_size - 1, 0);
}
+ if (ret)
+ goto out_reserve;
cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_reserve;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
@@ -948,7 +970,7 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
cur_alloc_size);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
- goto out_unlock;
+ goto out_reserve;
}
}
@@ -977,6 +999,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
out:
return ret;
+out_reserve:
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
out_unlock:
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
@@ -1180,6 +1204,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 disk_bytenr;
u64 num_bytes;
u64 disk_num_bytes;
+ u64 ram_bytes;
int extent_type;
int ret, err;
int type;
@@ -1276,6 +1301,7 @@ next_slot:
struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(leaf, fi);
+ ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -1359,6 +1385,7 @@ out_check:
em->block_len = num_bytes;
em->block_start = disk_bytenr;
em->orig_block_len = disk_num_bytes;
+ em->ram_bytes = ram_bytes;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->mod_start = em->start;
em->mod_len = em->len;
@@ -1367,10 +1394,7 @@ out_check:
em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -1511,7 +1535,7 @@ static void btrfs_merge_extent_hook(struct inode *inode,
* have pending delalloc work to be done.
*/
static void btrfs_set_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+ struct extent_state *state, unsigned long *bits)
{
/*
@@ -1555,7 +1579,8 @@ static void btrfs_set_bit_hook(struct inode *inode,
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
static void btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+ struct extent_state *state,
+ unsigned long *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
@@ -1743,8 +1768,10 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum *sum;
list_for_each_entry(sum, list, list) {
+ trans->adding_csums = 1;
btrfs_csum_file_blocks(trans,
BTRFS_I(inode)->root->fs_info->csum_root, sum);
+ trans->adding_csums = 0;
}
return 0;
}
@@ -2312,6 +2339,7 @@ again:
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = start;
+ path->leave_spinning = 1;
if (merge) {
struct btrfs_file_extent_item *fi;
u64 extent_len;
@@ -2368,6 +2396,7 @@ again:
btrfs_mark_buffer_dirty(leaf);
inode_add_bytes(inode, len);
+ btrfs_release_path(path);
ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
new->disk_len, 0,
@@ -2381,6 +2410,7 @@ again:
ret = 1;
out_free_path:
btrfs_release_path(path);
+ path->leave_spinning = 0;
btrfs_end_transaction(trans, root);
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
@@ -2774,6 +2804,8 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
int ret;
struct btrfs_root *root = BTRFS_I(inode)->root;
u32 csum = ~(u32)0;
+ static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
if (PageChecked(page)) {
ClearPageChecked(page);
@@ -2800,7 +2832,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (ret)
goto zeroit;
- csum = btrfs_csum_data(root, kaddr + offset, csum, end - start + 1);
+ csum = btrfs_csum_data(kaddr + offset, csum, end - start + 1);
btrfs_csum_final(csum, (char *)&csum);
if (csum != private)
goto zeroit;
@@ -2810,11 +2842,11 @@ good:
return 0;
zeroit:
- printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
- "private %llu\n",
- (unsigned long long)btrfs_ino(page->mapping->host),
- (unsigned long long)start, csum,
- (unsigned long long)private);
+ if (__ratelimit(&_rs))
+ btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
+ (unsigned long long)btrfs_ino(page->mapping->host),
+ (unsigned long long)start, csum,
+ (unsigned long long)private);
memset(kaddr + offset, 1, end - start + 1);
flush_dcache_page(page);
kunmap_atomic(kaddr);
@@ -3000,7 +3032,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
* We have done the truncate/delete so we can go ahead and remove the orphan
* item for this particular inode.
*/
-int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
+static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
+ struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
int delete_item = 0;
@@ -3095,8 +3128,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*/
if (found_key.offset == last_objectid) {
- printk(KERN_ERR "btrfs: Error removing orphan entry, "
- "stopping orphan cleanup\n");
+ btrfs_err(root->fs_info,
+ "Error removing orphan entry, stopping orphan cleanup");
ret = -EINVAL;
goto out;
}
@@ -3153,8 +3186,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = PTR_ERR(trans);
goto out;
}
- printk(KERN_ERR "auto deleting %Lu\n",
- found_key.objectid);
+ btrfs_debug(root->fs_info, "auto deleting %Lu",
+ found_key.objectid);
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -3218,13 +3251,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
}
if (nr_unlink)
- printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
+ btrfs_debug(root->fs_info, "unlinked %d orphans", nr_unlink);
if (nr_truncate)
- printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
+ btrfs_debug(root->fs_info, "truncated %d orphans", nr_truncate);
out:
if (ret)
- printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
+ btrfs_crit(root->fs_info,
+ "could not do orphan cleanup %d", ret);
btrfs_free_path(path);
return ret;
}
@@ -3572,9 +3606,10 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
dir_ino, &index);
if (ret) {
- printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
- "inode %llu parent %llu\n", name_len, name,
- (unsigned long long)ino, (unsigned long long)dir_ino);
+ btrfs_info(root->fs_info,
+ "failed to delete reference to %.*s, inode %llu parent %llu",
+ name_len, name,
+ (unsigned long long)ino, (unsigned long long)dir_ino);
btrfs_abort_transaction(trans, root, ret);
goto err;
}
@@ -3596,6 +3631,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
dir, index);
if (ret == -ENOENT)
ret = 0;
+ else if (ret)
+ btrfs_abort_transaction(trans, root, ret);
err:
btrfs_free_path(path);
if (ret)
@@ -3641,7 +3678,7 @@ static int check_path_shared(struct btrfs_root *root,
eb = path->nodes[level];
if (!btrfs_block_can_be_shared(root, eb))
continue;
- ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len,
+ ret = btrfs_lookup_extent_info(NULL, root, eb->start, level, 1,
&refs, NULL);
if (refs > 1)
return 1;
@@ -3676,11 +3713,9 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
* 1 for the dir item
* 1 for the dir index
* 1 for the inode ref
- * 1 for the inode ref in the tree log
- * 2 for the dir entries in the log
* 1 for the inode
*/
- trans = btrfs_start_transaction(root, 8);
+ trans = btrfs_start_transaction(root, 5);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
@@ -4158,8 +4193,7 @@ search_again:
}
size =
btrfs_file_extent_calc_inline_size(size);
- btrfs_truncate_item(trans, root, path,
- size, 1);
+ btrfs_truncate_item(root, path, size, 1);
} else if (root->ref_cows) {
inode_sub_bytes(inode, item_end + 1 -
found_key.offset);
@@ -4433,16 +4467,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
+ hole_em->ram_bytes = hole_size;
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
while (1) {
write_lock(&em_tree->lock);
- err = add_extent_mapping(em_tree, hole_em);
- if (!err)
- list_move(&hole_em->list,
- &em_tree->modified_extents);
+ err = add_extent_mapping(em_tree, hole_em, 1);
write_unlock(&em_tree->lock);
if (err != -EEXIST)
break;
@@ -4653,8 +4685,9 @@ void btrfs_evict_inode(struct inode *inode)
ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
if (ret) {
- printk(KERN_WARNING "Could not get space for a "
- "delete, will truncate on mount %d\n", ret);
+ btrfs_warn(root->fs_info,
+ "Could not get space for a delete, will truncate on mount %d",
+ ret);
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
goto no_delete;
@@ -4695,6 +4728,7 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root);
no_delete:
+ btrfs_remove_delayed_node(inode);
clear_inode(inode);
return;
}
@@ -4810,14 +4844,13 @@ static void inode_tree_add(struct inode *inode)
struct rb_node **p;
struct rb_node *parent;
u64 ino = btrfs_ino(inode);
-again:
- p = &root->inode_tree.rb_node;
- parent = NULL;
if (inode_unhashed(inode))
return;
-
+again:
+ parent = NULL;
spin_lock(&root->inode_lock);
+ p = &root->inode_tree.rb_node;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct btrfs_inode, rb_node);
@@ -5318,7 +5351,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
* FIXME, needs more benchmarking...there are no reasons other than performance
* to keep or drop this code.
*/
-int btrfs_dirty_inode(struct inode *inode)
+static int btrfs_dirty_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
@@ -5960,7 +5993,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree,
em->block_start += start_diff;
em->block_len -= start_diff;
}
- return add_extent_mapping(em_tree, em);
+ return add_extent_mapping(em_tree, em, 0);
}
static noinline int uncompress_inline(struct btrfs_path *path,
@@ -6134,6 +6167,7 @@ again:
goto not_found_em;
}
+ em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;
@@ -6242,18 +6276,18 @@ not_found_em:
insert:
btrfs_release_path(path);
if (em->start > start || extent_map_end(em) <= start) {
- printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
- "[%llu %llu]\n", (unsigned long long)em->start,
- (unsigned long long)em->len,
- (unsigned long long)start,
- (unsigned long long)len);
+ btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
+ (unsigned long long)em->start,
+ (unsigned long long)em->len,
+ (unsigned long long)start,
+ (unsigned long long)len);
err = -EIO;
goto out;
}
err = 0;
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
+ ret = add_extent_mapping(em_tree, em, 0);
/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
@@ -6465,7 +6499,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
}
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
- ins.offset, ins.offset, 0);
+ ins.offset, ins.offset, ins.offset, 0);
if (IS_ERR(em))
goto out;
@@ -6485,7 +6519,9 @@ out:
* block must be cow'd
*/
static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 offset, u64 len)
+ struct inode *inode, u64 offset, u64 *len,
+ u64 *orig_start, u64 *orig_block_len,
+ u64 *ram_bytes)
{
struct btrfs_path *path;
int ret;
@@ -6542,8 +6578,12 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
backref_offset = btrfs_file_extent_offset(leaf, fi);
+ *orig_start = key.offset - backref_offset;
+ *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ *ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
+
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
- if (extent_end < offset + len) {
+ if (extent_end < offset + *len) {
/* extent doesn't include our full range, must cow */
goto out;
}
@@ -6567,13 +6607,14 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
*/
disk_bytenr += backref_offset;
disk_bytenr += offset - key.offset;
- num_bytes = min(offset + len, extent_end) - offset;
+ num_bytes = min(offset + *len, extent_end) - offset;
if (csum_exist_in_range(root, disk_bytenr, num_bytes))
goto out;
/*
* all of the above have passed, it is safe to overwrite this extent
* without cow
*/
+ *len = num_bytes;
ret = 1;
out:
btrfs_free_path(path);
@@ -6644,7 +6685,8 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
- u64 orig_block_len, int type)
+ u64 orig_block_len, u64 ram_bytes,
+ int type)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
@@ -6665,6 +6707,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->block_start = block_start;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->orig_block_len = orig_block_len;
+ em->ram_bytes = ram_bytes;
em->generation = -1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
if (type == BTRFS_ORDERED_PREALLOC)
@@ -6674,10 +6717,7 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
btrfs_drop_extent_cache(inode, em->start,
em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
@@ -6772,7 +6812,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
em->block_start != EXTENT_MAP_HOLE)) {
int type;
int ret;
- u64 block_start;
+ u64 block_start, orig_start, orig_block_len, ram_bytes;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
type = BTRFS_ORDERED_PREALLOC;
@@ -6790,16 +6830,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (IS_ERR(trans))
goto must_cow;
- if (can_nocow_odirect(trans, inode, start, len) == 1) {
- u64 orig_start = em->orig_start;
- u64 orig_block_len = em->orig_block_len;
-
+ if (can_nocow_odirect(trans, inode, start, &len, &orig_start,
+ &orig_block_len, &ram_bytes) == 1) {
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
em = create_pinned_em(inode, start, len,
orig_start,
block_start, len,
- orig_block_len, type);
+ orig_block_len,
+ ram_bytes, type);
if (IS_ERR(em)) {
btrfs_end_transaction(trans, root);
goto unlock_err;
@@ -6893,7 +6932,11 @@ struct btrfs_dio_private {
/* IO errors */
int errors;
+ /* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;
+
+ /* dio_bio came from fs/direct-io.c */
+ struct bio *dio_bio;
};
static void btrfs_endio_direct_read(struct bio *bio, int err)
@@ -6903,6 +6946,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct bio_vec *bvec = bio->bi_io_vec;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct bio *dio_bio;
u64 start;
start = dip->logical_offset;
@@ -6919,7 +6963,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
goto failed;
local_irq_save(flags);
kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
+ csum = btrfs_csum_data(kaddr + bvec->bv_offset,
csum, bvec->bv_len);
btrfs_csum_final(csum, (char *)&csum);
kunmap_atomic(kaddr);
@@ -6928,11 +6972,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
flush_dcache_page(bvec->bv_page);
if (csum != private) {
failed:
- printk(KERN_ERR "btrfs csum failed ino %llu off"
- " %llu csum %u private %u\n",
- (unsigned long long)btrfs_ino(inode),
- (unsigned long long)start,
- csum, (unsigned)private);
+ btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
+ (unsigned long long)btrfs_ino(inode),
+ (unsigned long long)start,
+ csum, (unsigned)private);
err = -EIO;
}
}
@@ -6943,14 +6986,15 @@ failed:
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
- bio->bi_private = dip->private;
+ dio_bio = dip->dio_bio;
kfree(dip);
/* If we had a csum failure make sure to clear the uptodate flag */
if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ dio_end_io(dio_bio, err);
+ bio_put(bio);
}
static void btrfs_endio_direct_write(struct bio *bio, int err)
@@ -6961,6 +7005,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_ordered_extent *ordered = NULL;
u64 ordered_offset = dip->logical_offset;
u64 ordered_bytes = dip->bytes;
+ struct bio *dio_bio;
int ret;
if (err)
@@ -6988,14 +7033,15 @@ out_test:
goto again;
}
out_done:
- bio->bi_private = dip->private;
+ dio_bio = dip->dio_bio;
kfree(dip);
/* If we had an error make sure to clear the uptodate flag */
if (err)
- clear_bit(BIO_UPTODATE, &bio->bi_flags);
- dio_end_io(bio, err);
+ clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
+ dio_end_io(dio_bio, err);
+ bio_put(bio);
}
static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
@@ -7031,10 +7077,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
if (!atomic_dec_and_test(&dip->pending_bios))
goto out;
- if (dip->errors)
+ if (dip->errors) {
bio_io_error(dip->orig_bio);
- else {
- set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+ } else {
+ set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
bio_endio(dip->orig_bio, 0);
}
out:
@@ -7209,25 +7255,34 @@ out_err:
return 0;
}
-static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
- loff_t file_offset)
+static void btrfs_submit_direct(int rw, struct bio *dio_bio,
+ struct inode *inode, loff_t file_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec = dio_bio->bi_io_vec;
+ struct bio *io_bio;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
+ io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
+
+ if (!io_bio) {
+ ret = -ENOMEM;
+ goto free_ordered;
+ }
+
dip = kmalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
ret = -ENOMEM;
- goto free_ordered;
+ goto free_io_bio;
}
- dip->private = bio->bi_private;
+ dip->private = dio_bio->bi_private;
+ io_bio->bi_private = dio_bio->bi_private;
dip->inode = inode;
dip->logical_offset = file_offset;
@@ -7235,22 +7290,27 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
do {
dip->bytes += bvec->bv_len;
bvec++;
- } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
+ } while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));
- dip->disk_bytenr = (u64)bio->bi_sector << 9;
- bio->bi_private = dip;
+ dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+ io_bio->bi_private = dip;
dip->errors = 0;
- dip->orig_bio = bio;
+ dip->orig_bio = io_bio;
+ dip->dio_bio = dio_bio;
atomic_set(&dip->pending_bios, 0);
if (write)
- bio->bi_end_io = btrfs_endio_direct_write;
+ io_bio->bi_end_io = btrfs_endio_direct_write;
else
- bio->bi_end_io = btrfs_endio_direct_read;
+ io_bio->bi_end_io = btrfs_endio_direct_read;
ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (!ret)
return;
+
+free_io_bio:
+ bio_put(io_bio);
+
free_ordered:
/*
* If this is a write, we need to clean up the reserved space and kill
@@ -7266,7 +7326,7 @@ free_ordered:
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
- bio_endio(bio, ret);
+ bio_endio(dio_bio, ret);
}
static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
@@ -7408,8 +7468,8 @@ static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
}
-int btrfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+static int btrfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
struct extent_io_tree *tree;
@@ -7450,7 +7510,8 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
return __btrfs_releasepage(page, gfp_flags & GFP_NOFS);
}
-static void btrfs_invalidatepage(struct page *page, unsigned long offset)
+static void btrfs_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct inode *inode = page->mapping->host;
struct extent_io_tree *tree;
@@ -7924,8 +7985,8 @@ void btrfs_destroy_inode(struct inode *inode)
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
- printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
- (unsigned long long)btrfs_ino(inode));
+ btrfs_info(root->fs_info, "inode %llu still on the orphan list",
+ (unsigned long long)btrfs_ino(inode));
atomic_dec(&root->orphan_inodes);
}
@@ -7934,10 +7995,9 @@ void btrfs_destroy_inode(struct inode *inode)
if (!ordered)
break;
else {
- printk(KERN_ERR "btrfs found ordered "
- "extent %llu %llu on inode cleanup\n",
- (unsigned long long)ordered->file_offset,
- (unsigned long long)ordered->len);
+ btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
+ (unsigned long long)ordered->file_offset,
+ (unsigned long long)ordered->len);
btrfs_remove_ordered_extent(inode, ordered);
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
@@ -7946,7 +8006,6 @@ void btrfs_destroy_inode(struct inode *inode)
inode_tree_del(inode);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
free:
- btrfs_remove_delayed_node(inode);
call_rcu(&inode->i_rcu, btrfs_i_callback);
}
@@ -8124,7 +8183,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items
* should cover the worst case number of items we'll modify.
*/
- trans = btrfs_start_transaction(root, 20);
+ trans = btrfs_start_transaction(root, 11);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_notrans;
@@ -8502,6 +8561,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
struct btrfs_key ins;
u64 cur_offset = start;
u64 i_size;
+ u64 cur_bytes;
int ret = 0;
bool own_trans = true;
@@ -8516,8 +8576,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
}
- ret = btrfs_reserve_extent(trans, root,
- min(num_bytes, 256ULL * 1024 * 1024),
+ cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
+ cur_bytes = max(cur_bytes, min_size);
+ ret = btrfs_reserve_extent(trans, root, cur_bytes,
min_size, 0, *alloc_hint, &ins, 1);
if (ret) {
if (own_trans)
@@ -8552,16 +8613,14 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->block_start = ins.objectid;
em->block_len = ins.offset;
em->orig_block_len = ins.offset;
+ em->ram_bytes = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
em->generation = trans->transid;
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- if (!ret)
- list_move(&em->list,
- &em_tree->modified_extents);
+ ret = add_extent_mapping(em_tree, em, 1);
write_unlock(&em_tree->lock);
if (ret != -EEXIST)
break;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c83086fdda05..0f81d67cdc8d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -527,6 +527,8 @@ fail:
if (async_transid) {
*async_transid = trans->transid;
err = btrfs_commit_transaction_async(trans, root, 1);
+ if (err)
+ err = btrfs_commit_transaction(trans, root);
} else {
err = btrfs_commit_transaction(trans, root);
}
@@ -592,16 +594,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
*async_transid = trans->transid;
ret = btrfs_commit_transaction_async(trans,
root->fs_info->extent_root, 1);
+ if (ret)
+ ret = btrfs_commit_transaction(trans, root);
} else {
ret = btrfs_commit_transaction(trans,
root->fs_info->extent_root);
}
- if (ret) {
- /* cleanup_transaction has freed this for us */
- if (trans->aborted)
- pending_snapshot = NULL;
+ if (ret)
goto fail;
- }
ret = pending_snapshot->error;
if (ret)
@@ -723,7 +723,9 @@ static noinline int btrfs_mksubvol(struct path *parent,
struct dentry *dentry;
int error;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ if (error == -EINTR)
+ return error;
dentry = lookup_one_len(name, parent->dentry, namelen);
error = PTR_ERR(dentry);
@@ -1152,8 +1154,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
u64 new_align = ~((u64)128 * 1024 - 1);
struct page **pages = NULL;
- if (extent_thresh == 0)
- extent_thresh = 256 * 1024;
+ if (isize == 0)
+ return 0;
+
+ if (range->start >= isize)
+ return -EINVAL;
if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
if (range->compress_type > BTRFS_COMPRESS_TYPES)
@@ -1162,8 +1167,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
compress_type = range->compress_type;
}
- if (isize == 0)
- return 0;
+ if (extent_thresh == 0)
+ extent_thresh = 256 * 1024;
/*
* if we were not given a file, allocate a readahead
@@ -1796,7 +1801,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
item_off = btrfs_item_ptr_offset(leaf, i);
item_len = btrfs_item_size_nr(leaf, i);
- if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
+ btrfs_item_key_to_cpu(leaf, key, i);
+ if (!key_in_sk(key, sk))
+ continue;
+
+ if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
item_len = 0;
if (sizeof(sh) + item_len + *sk_offset >
@@ -1805,10 +1814,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
goto overflow;
}
- btrfs_item_key_to_cpu(leaf, key, i);
- if (!key_in_sk(key, sk))
- continue;
-
sh.objectid = key->objectid;
sh.offset = key->offset;
sh.type = key->type;
@@ -2086,7 +2091,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
if (err)
goto out;
- mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ if (err == -EINTR)
+ goto out;
dentry = lookup_one_len(vol_args->name, parent, namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -2245,13 +2252,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
if (ret)
return ret;
- if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
- 1)) {
- pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
- mnt_drop_write_file(file);
- return -EINVAL;
- }
-
if (btrfs_root_readonly(root)) {
ret = -EROFS;
goto out;
@@ -2306,7 +2306,6 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
ret = -EINVAL;
}
out:
- atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
mnt_drop_write_file(file);
return ret;
}
@@ -2433,7 +2432,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
mutex_lock(&fs_devices->device_list_mutex);
dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL);
- mutex_unlock(&fs_devices->device_list_mutex);
if (!dev) {
ret = -ENODEV;
@@ -2457,6 +2455,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
}
out:
+ mutex_unlock(&fs_devices->device_list_mutex);
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
ret = -EFAULT;
@@ -3011,7 +3010,7 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
}
}
-long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
+static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
{
struct btrfs_ioctl_space_args space_args;
struct btrfs_ioctl_space_info space;
@@ -3701,12 +3700,11 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
goto drop_write;
}
- if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
- trans = btrfs_start_transaction(root, 2);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- goto out;
- }
+ down_write(&root->fs_info->subvol_sem);
+ trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
}
switch (sa->cmd) {
@@ -3716,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
case BTRFS_QUOTA_CTL_DISABLE:
ret = btrfs_quota_disable(trans, root->fs_info);
break;
- case BTRFS_QUOTA_CTL_RESCAN:
- ret = btrfs_quota_rescan(root->fs_info);
- break;
default:
ret = -EINVAL;
break;
@@ -3727,13 +3722,12 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
- if (trans) {
- err = btrfs_commit_transaction(trans, root);
- if (err && !ret)
- ret = err;
- }
+ err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
+ if (err && !ret)
+ ret = err;
out:
kfree(sa);
+ up_write(&root->fs_info->subvol_sem);
drop_write:
mnt_drop_write_file(file);
return ret;
@@ -3885,6 +3879,64 @@ drop_write:
return ret;
}
+static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
+{
+ struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_ioctl_quota_rescan_args *qsa;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
+ qsa = memdup_user(arg, sizeof(*qsa));
+ if (IS_ERR(qsa)) {
+ ret = PTR_ERR(qsa);
+ goto drop_write;
+ }
+
+ if (qsa->flags) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = btrfs_qgroup_rescan(root->fs_info);
+
+out:
+ kfree(qsa);
+drop_write:
+ mnt_drop_write_file(file);
+ return ret;
+}
+
+static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
+{
+ struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+ struct btrfs_ioctl_quota_rescan_args *qsa;
+ int ret = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+ if (!qsa)
+ return -ENOMEM;
+
+ if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ qsa->flags = 1;
+ qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
+ }
+
+ if (copy_to_user(arg, qsa, sizeof(*qsa)))
+ ret = -EFAULT;
+
+ kfree(qsa);
+ return ret;
+}
+
static long btrfs_ioctl_set_received_subvol(struct file *file,
void __user *arg)
{
@@ -4123,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_qgroup_create(file, argp);
case BTRFS_IOC_QGROUP_LIMIT:
return btrfs_ioctl_qgroup_limit(file, argp);
+ case BTRFS_IOC_QUOTA_RESCAN:
+ return btrfs_ioctl_quota_rescan(file, argp);
+ case BTRFS_IOC_QUOTA_RESCAN_STATUS:
+ return btrfs_ioctl_quota_rescan_status(file, argp);
case BTRFS_IOC_DEV_REPLACE:
return btrfs_ioctl_dev_replace(root, argp);
case BTRFS_IOC_GET_FSLABEL:
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index e95df435d897..01277b8f2373 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -24,7 +24,7 @@
#include "extent_io.h"
#include "locking.h"
-void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb);
/*
* if we currently have a spinning reader or writer lock
@@ -264,7 +264,7 @@ void btrfs_assert_tree_locked(struct extent_buffer *eb)
BUG_ON(!atomic_read(&eb->write_locks));
}
-void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
+static void btrfs_assert_tree_read_locked(struct extent_buffer *eb)
{
BUG_ON(!atomic_read(&eb->read_locks));
}
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index ca52681e5f40..b81e0e9a4894 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -26,7 +26,6 @@
void btrfs_tree_lock(struct extent_buffer *eb);
void btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_try_spin_lock(struct extent_buffer *eb);
void btrfs_tree_read_lock(struct extent_buffer *eb);
void btrfs_tree_read_unlock(struct extent_buffer *eb);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index dc08d77b717e..1ddd728541ee 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -557,6 +557,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
INIT_LIST_HEAD(&splice);
INIT_LIST_HEAD(&works);
+ mutex_lock(&root->fs_info->ordered_operations_mutex);
spin_lock(&root->fs_info->ordered_extent_lock);
list_splice_init(&root->fs_info->ordered_extents, &splice);
while (!list_empty(&splice)) {
@@ -600,6 +601,7 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
cond_resched();
}
+ mutex_unlock(&root->fs_info->ordered_operations_mutex);
}
/*
@@ -984,7 +986,7 @@ out:
* be reclaimed before their checksum is actually put into the btree
*/
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
- u32 *sum)
+ u32 *sum, int len)
{
struct btrfs_ordered_sum *ordered_sum;
struct btrfs_sector_sum *sector_sums;
@@ -993,22 +995,28 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
unsigned long num_sectors;
unsigned long i;
u32 sectorsize = BTRFS_I(inode)->root->sectorsize;
- int ret = 1;
+ int index = 0;
ordered = btrfs_lookup_ordered_extent(inode, offset);
if (!ordered)
- return 1;
+ return 0;
spin_lock_irq(&tree->lock);
list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
- if (disk_bytenr >= ordered_sum->bytenr) {
- num_sectors = ordered_sum->len / sectorsize;
- sector_sums = ordered_sum->sums;
- for (i = 0; i < num_sectors; i++) {
+ if (disk_bytenr >= ordered_sum->bytenr &&
+ disk_bytenr < ordered_sum->bytenr + ordered_sum->len) {
+ i = (disk_bytenr - ordered_sum->bytenr) >>
+ inode->i_sb->s_blocksize_bits;
+ sector_sums = ordered_sum->sums + i;
+ num_sectors = ordered_sum->len >>
+ inode->i_sb->s_blocksize_bits;
+ for (; i < num_sectors; i++) {
if (sector_sums[i].bytenr == disk_bytenr) {
- *sum = sector_sums[i].sum;
- ret = 0;
- goto out;
+ sum[index] = sector_sums[i].sum;
+ index++;
+ if (index == len)
+ goto out;
+ disk_bytenr += sectorsize;
}
}
}
@@ -1016,7 +1024,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
out:
spin_unlock_irq(&tree->lock);
btrfs_put_ordered_extent(ordered);
- return ret;
+ return index;
}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8eadfe406cdd..58b0e3b0ebad 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -196,7 +196,8 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
-int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
+int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
+ u32 *sum, int len);
int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int wait);
void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 920957ecb27e..dc0024f17c1f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -176,7 +176,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
nr = btrfs_header_nritems(l);
- printk(KERN_INFO "leaf %llu total ptrs %d free space %d\n",
+ btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
(unsigned long long)btrfs_header_bytenr(l), nr,
btrfs_leaf_free_space(root, l));
for (i = 0 ; i < nr ; i++) {
@@ -319,10 +319,9 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
btrfs_print_leaf(root, c);
return;
}
- printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
- (unsigned long long)btrfs_header_bytenr(c),
- level, nr,
- (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
+ btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
+ (unsigned long long)btrfs_header_bytenr(c),
+ level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i);
printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
diff --git a/fs/btrfs/print-tree.h b/fs/btrfs/print-tree.h
index da75efe534d5..7faddfacc5bd 100644
--- a/fs/btrfs/print-tree.h
+++ b/fs/btrfs/print-tree.h
@@ -19,5 +19,5 @@
#ifndef __PRINT_TREE_
#define __PRINT_TREE_
void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l);
-void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *t);
+void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c);
#endif
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aee4b1cc3d98..9d49c586995a 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
#include "locking.h"
#include "ulist.h"
#include "backref.h"
+#include "extent_io.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
* - compressed
* - sync
- * - rescan
* - copy also limits on subvol creation
* - limit
* - caches fuer ulists
@@ -98,7 +98,15 @@ struct btrfs_qgroup_list {
struct btrfs_qgroup *member;
};
-/* must be called with qgroup_lock held */
+struct qgroup_rescan {
+ struct btrfs_work work;
+ struct btrfs_fs_info *fs_info;
+};
+
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
+ struct qgroup_rescan *qscan);
+
+/* must be called with qgroup_ioctl_lock held */
static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
u64 qgroupid)
{
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
}
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
ptr);
- /* FIXME read scan element */
+ fs_info->qgroup_rescan_progress.objectid =
+ btrfs_qgroup_status_rescan(l, ptr);
+ if (fs_info->qgroup_flags &
+ BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ struct qgroup_rescan *qscan =
+ kmalloc(sizeof(*qscan), GFP_NOFS);
+ if (!qscan) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fs_info->qgroup_rescan_progress.type = 0;
+ fs_info->qgroup_rescan_progress.offset = 0;
+ qgroup_rescan_start(fs_info, qscan);
+ }
goto next1;
}
@@ -420,8 +441,6 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
qgroup = rb_entry(n, struct btrfs_qgroup, node);
rb_erase(n, &fs_info->qgroup_tree);
- WARN_ON(!list_empty(&qgroup->dirty));
-
while (!list_empty(&qgroup->groups)) {
list = list_first_entry(&qgroup->groups,
struct btrfs_qgroup_list,
@@ -721,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
- /* XXX scan */
+ btrfs_set_qgroup_status_rescan(l, ptr,
+ fs_info->qgroup_rescan_progress.objectid);
btrfs_mark_buffer_dirty(l);
@@ -783,19 +803,21 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *quota_root;
+ struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_path *path = NULL;
struct btrfs_qgroup_status_item *ptr;
struct extent_buffer *leaf;
struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_qgroup *qgroup = NULL;
int ret = 0;
+ int slot;
- spin_lock(&fs_info->qgroup_lock);
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (fs_info->quota_root) {
fs_info->pending_quota_state = 1;
- spin_unlock(&fs_info->qgroup_lock);
goto out;
}
- spin_unlock(&fs_info->qgroup_lock);
/*
* initially create the quota tree
@@ -830,10 +852,57 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
- btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+ btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
btrfs_mark_buffer_dirty(leaf);
+ key.objectid = 0;
+ key.type = BTRFS_ROOT_REF_KEY;
+ key.offset = 0;
+
+ btrfs_release_path(path);
+ ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
+ if (ret > 0)
+ goto out_add_root;
+ if (ret < 0)
+ goto out_free_path;
+
+
+ while (1) {
+ slot = path->slots[0];
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+
+ if (found_key.type == BTRFS_ROOT_REF_KEY) {
+ ret = add_qgroup_item(trans, quota_root,
+ found_key.offset);
+ if (ret)
+ goto out_free_path;
+
+ qgroup = add_qgroup_rb(fs_info, found_key.offset);
+ if (IS_ERR(qgroup)) {
+ ret = PTR_ERR(qgroup);
+ goto out_free_path;
+ }
+ }
+ ret = btrfs_next_item(tree_root, path);
+ if (ret < 0)
+ goto out_free_path;
+ if (ret)
+ break;
+ }
+
+out_add_root:
+ btrfs_release_path(path);
+ ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
+ if (ret)
+ goto out_free_path;
+
+ qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
+ if (IS_ERR(qgroup)) {
+ ret = PTR_ERR(qgroup);
+ goto out_free_path;
+ }
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
fs_info->pending_quota_state = 1;
@@ -847,6 +916,7 @@ out_free_root:
kfree(quota_root);
}
out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -857,11 +927,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ if (!fs_info->quota_root)
+ goto out;
spin_lock(&fs_info->qgroup_lock);
- if (!fs_info->quota_root) {
- spin_unlock(&fs_info->qgroup_lock);
- return 0;
- }
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
quota_root = fs_info->quota_root;
@@ -869,8 +938,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
btrfs_free_qgroup_config(fs_info);
spin_unlock(&fs_info->qgroup_lock);
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
ret = btrfs_clean_quota_tree(trans, quota_root);
if (ret)
@@ -891,39 +962,62 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
free_extent_buffer(quota_root->commit_root);
kfree(quota_root);
out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+ struct btrfs_qgroup *qgroup)
{
- /* FIXME */
- return 0;
+ if (list_empty(&qgroup->dirty))
+ list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
}
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup_list *list;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ member = find_qgroup_rb(fs_info, src);
+ parent = find_qgroup_rb(fs_info, dst);
+ if (!member || !parent) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* check if such qgroup relation exist firstly */
+ list_for_each_entry(list, &member->groups, next_group) {
+ if (list->group == parent) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
ret = add_qgroup_relation_item(trans, quota_root, src, dst);
if (ret)
- return ret;
+ goto out;
ret = add_qgroup_relation_item(trans, quota_root, dst, src);
if (ret) {
del_qgroup_relation_item(trans, quota_root, src, dst);
- return ret;
+ goto out;
}
spin_lock(&fs_info->qgroup_lock);
ret = add_relation_rb(quota_root->fs_info, src, dst);
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -931,13 +1025,34 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst)
{
struct btrfs_root *quota_root;
+ struct btrfs_qgroup *parent;
+ struct btrfs_qgroup *member;
+ struct btrfs_qgroup_list *list;
int ret = 0;
int err;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ member = find_qgroup_rb(fs_info, src);
+ parent = find_qgroup_rb(fs_info, dst);
+ if (!member || !parent) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* check if such qgroup relation exist firstly */
+ list_for_each_entry(list, &member->groups, next_group) {
+ if (list->group == parent)
+ goto exist;
+ }
+ ret = -ENOENT;
+ goto out;
+exist:
ret = del_qgroup_relation_item(trans, quota_root, src, dst);
err = del_qgroup_relation_item(trans, quota_root, dst, src);
if (err && !ret)
@@ -945,9 +1060,9 @@ int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
spin_lock(&fs_info->qgroup_lock);
del_relation_rb(fs_info, src, dst);
-
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -958,11 +1073,21 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *qgroup;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (qgroup) {
+ ret = -EEXIST;
+ goto out;
+ }
ret = add_qgroup_item(trans, quota_root, qgroupid);
+ if (ret)
+ goto out;
spin_lock(&fs_info->qgroup_lock);
qgroup = add_qgroup_rb(fs_info, qgroupid);
@@ -970,7 +1095,8 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
if (IS_ERR(qgroup))
ret = PTR_ERR(qgroup);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -981,27 +1107,32 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *qgroup;
int ret = 0;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
quota_root = fs_info->quota_root;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
- /* check if there are no relations to this qgroup */
- spin_lock(&fs_info->qgroup_lock);
qgroup = find_qgroup_rb(fs_info, qgroupid);
- if (qgroup) {
- if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
- spin_unlock(&fs_info->qgroup_lock);
- return -EBUSY;
+ if (!qgroup) {
+ ret = -ENOENT;
+ goto out;
+ } else {
+ /* check if there are no relations to this qgroup */
+ if (!list_empty(&qgroup->groups) ||
+ !list_empty(&qgroup->members)) {
+ ret = -EBUSY;
+ goto out;
}
}
- spin_unlock(&fs_info->qgroup_lock);
-
ret = del_qgroup_item(trans, quota_root, qgroupid);
spin_lock(&fs_info->qgroup_lock);
del_qgroup_rb(quota_root->fs_info, qgroupid);
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -1009,13 +1140,22 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit)
{
- struct btrfs_root *quota_root = fs_info->quota_root;
+ struct btrfs_root *quota_root;
struct btrfs_qgroup *qgroup;
int ret = 0;
- if (!quota_root)
- return -EINVAL;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
+ quota_root = fs_info->quota_root;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+ qgroup = find_qgroup_rb(fs_info, qgroupid);
+ if (!qgroup) {
+ ret = -ENOENT;
+ goto out;
+ }
ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
limit->flags, limit->max_rfer,
limit->max_excl, limit->rsv_rfer,
@@ -1027,31 +1167,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
}
spin_lock(&fs_info->qgroup_lock);
-
- qgroup = find_qgroup_rb(fs_info, qgroupid);
- if (!qgroup) {
- ret = -ENOENT;
- goto unlock;
- }
qgroup->lim_flags = limit->flags;
qgroup->max_rfer = limit->max_rfer;
qgroup->max_excl = limit->max_excl;
qgroup->rsv_rfer = limit->rsv_rfer;
qgroup->rsv_excl = limit->rsv_excl;
-
-unlock:
spin_unlock(&fs_info->qgroup_lock);
-
+out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
- struct btrfs_qgroup *qgroup)
-{
- if (list_empty(&qgroup->dirty))
- list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
-}
-
/*
* btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
* the modification into a list that's later used by btrfs_end_transaction to
@@ -1075,6 +1201,144 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
return 0;
}
+static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ u64 seq)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct ulist_node *tmp_unode;
+ struct ulist_iterator tmp_uiter;
+ struct btrfs_qgroup *qg;
+ int ret;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ulist_reinit(tmp);
+ /* XXX id not needed */
+ ret = ulist_add(tmp, qg->qgroupid,
+ (u64)(uintptr_t)qg, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+ if (qg->refcnt < seq)
+ qg->refcnt = seq + 1;
+ else
+ ++qg->refcnt;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (u64)(uintptr_t)glist->group,
+ GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ u64 seq, int sgn, u64 num_bytes,
+ struct btrfs_qgroup *qgroup)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+ int ret;
+
+ ulist_reinit(tmp);
+ ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tmp, &uiter))) {
+ qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
+ if (qg->refcnt < seq) {
+ /* not visited by step 1 */
+ qg->rfer += sgn * num_bytes;
+ qg->rfer_cmpr += sgn * num_bytes;
+ if (roots->nnodes == 0) {
+ qg->excl += sgn * num_bytes;
+ qg->excl_cmpr += sgn * num_bytes;
+ }
+ qgroup_dirty(fs_info, qg);
+ }
+ WARN_ON(qg->tag >= seq);
+ qg->tag = seq;
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
+ struct ulist *roots, struct ulist *tmp,
+ u64 seq, int sgn, u64 num_bytes)
+{
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct btrfs_qgroup *qg;
+ struct ulist_node *tmp_unode;
+ struct ulist_iterator tmp_uiter;
+ int ret;
+
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ulist_reinit(tmp);
+ ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+
+ ULIST_ITER_INIT(&tmp_uiter);
+ while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
+ if (qg->tag == seq)
+ continue;
+
+ if (qg->refcnt - seq == roots->nnodes) {
+ qg->excl -= sgn * num_bytes;
+ qg->excl_cmpr -= sgn * num_bytes;
+ qgroup_dirty(fs_info, qg);
+ }
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (uintptr_t)glist->group,
+ GFP_ATOMIC);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ }
+
+ return 0;
+}
+
/*
* btrfs_qgroup_account_ref is called for every ref that is added to or deleted
* from the fs. First, all roots referencing the extent are searched, and
@@ -1090,10 +1354,8 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *quota_root;
u64 ref_root;
struct btrfs_qgroup *qgroup;
- struct ulist_node *unode;
struct ulist *roots = NULL;
struct ulist *tmp = NULL;
- struct ulist_iterator uiter;
u64 seq;
int ret = 0;
int sgn;
@@ -1132,9 +1394,11 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
case BTRFS_ADD_DELAYED_REF:
case BTRFS_ADD_DELAYED_EXTENT:
sgn = 1;
+ seq = btrfs_tree_mod_seq_prev(node->seq);
break;
case BTRFS_DROP_DELAYED_REF:
sgn = -1;
+ seq = node->seq;
break;
case BTRFS_UPDATE_DELAYED_HEAD:
return 0;
@@ -1142,20 +1406,37 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
BUG();
}
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ return 0;
+ }
+ }
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
/*
* the delayed ref sequence number we pass depends on the direction of
- * the operation. for add operations, we pass (node->seq - 1) to skip
+ * the operation. for add operations, we pass
+ * tree_mod_log_prev_seq(node->seq) to skip
* the delayed ref's current sequence number, because we need the state
* of the tree before the add operation. for delete operations, we pass
* (node->seq) to include the delayed ref's current sequence number,
* because we need the state of the tree after the delete operation.
*/
- ret = btrfs_find_all_roots(trans, fs_info, node->bytenr,
- sgn > 0 ? node->seq - 1 : node->seq, &roots);
+ ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
if (ret < 0)
- goto out;
+ return ret;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+ if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+ ret = 0;
+ goto unlock;
+ }
+ }
+
quota_root = fs_info->quota_root;
if (!quota_root)
goto unlock;
@@ -1175,107 +1456,29 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
seq = fs_info->qgroup_seq;
fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- struct ulist_node *tmp_unode;
- struct ulist_iterator tmp_uiter;
- struct btrfs_qgroup *qg;
-
- qg = find_qgroup_rb(fs_info, unode->val);
- if (!qg)
- continue;
-
- ulist_reinit(tmp);
- /* XXX id not needed */
- ulist_add(tmp, qg->qgroupid, (u64)(uintptr_t)qg, GFP_ATOMIC);
- ULIST_ITER_INIT(&tmp_uiter);
- while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
- if (qg->refcnt < seq)
- qg->refcnt = seq + 1;
- else
- ++qg->refcnt;
-
- list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(tmp, glist->group->qgroupid,
- (u64)(uintptr_t)glist->group,
- GFP_ATOMIC);
- }
- }
- }
+ ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+ if (ret)
+ goto unlock;
/*
* step 2: walk from the new root
*/
- ulist_reinit(tmp);
- ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(tmp, &uiter))) {
- struct btrfs_qgroup *qg;
- struct btrfs_qgroup_list *glist;
-
- qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
- if (qg->refcnt < seq) {
- /* not visited by step 1 */
- qg->rfer += sgn * node->num_bytes;
- qg->rfer_cmpr += sgn * node->num_bytes;
- if (roots->nnodes == 0) {
- qg->excl += sgn * node->num_bytes;
- qg->excl_cmpr += sgn * node->num_bytes;
- }
- qgroup_dirty(fs_info, qg);
- }
- WARN_ON(qg->tag >= seq);
- qg->tag = seq;
-
- list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(tmp, glist->group->qgroupid,
- (uintptr_t)glist->group, GFP_ATOMIC);
- }
- }
+ ret = qgroup_account_ref_step2(fs_info, roots, tmp, seq, sgn,
+ node->num_bytes, qgroup);
+ if (ret)
+ goto unlock;
/*
* step 3: walk again from old refs
*/
- ULIST_ITER_INIT(&uiter);
- while ((unode = ulist_next(roots, &uiter))) {
- struct btrfs_qgroup *qg;
- struct ulist_node *tmp_unode;
- struct ulist_iterator tmp_uiter;
-
- qg = find_qgroup_rb(fs_info, unode->val);
- if (!qg)
- continue;
-
- ulist_reinit(tmp);
- ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
- ULIST_ITER_INIT(&tmp_uiter);
- while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
- struct btrfs_qgroup_list *glist;
-
- qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
- if (qg->tag == seq)
- continue;
-
- if (qg->refcnt - seq == roots->nnodes) {
- qg->excl -= sgn * node->num_bytes;
- qg->excl_cmpr -= sgn * node->num_bytes;
- qgroup_dirty(fs_info, qg);
- }
+ ret = qgroup_account_ref_step3(fs_info, roots, tmp, seq, sgn,
+ node->num_bytes);
+ if (ret)
+ goto unlock;
- list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(tmp, glist->group->qgroupid,
- (uintptr_t)glist->group,
- GFP_ATOMIC);
- }
- }
- }
- ret = 0;
unlock:
spin_unlock(&fs_info->qgroup_lock);
-out:
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
ulist_free(roots);
ulist_free(tmp);
@@ -1290,10 +1493,14 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
{
struct btrfs_root *quota_root = fs_info->quota_root;
int ret = 0;
+ int start_rescan_worker = 0;
if (!quota_root)
goto out;
+ if (!fs_info->quota_enabled && fs_info->pending_quota_state)
+ start_rescan_worker = 1;
+
fs_info->quota_enabled = fs_info->pending_quota_state;
spin_lock(&fs_info->qgroup_lock);
@@ -1319,6 +1526,13 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ if (!ret && start_rescan_worker) {
+ ret = btrfs_qgroup_rescan(fs_info);
+ if (ret)
+ pr_err("btrfs: start rescan quota failed: %d\n", ret);
+ ret = 0;
+ }
+
out:
return ret;
@@ -1339,12 +1553,30 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_qgroup *srcgroup;
struct btrfs_qgroup *dstgroup;
u32 level_size = 0;
+ u64 nums;
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_enabled)
- return 0;
+ goto out;
- if (!quota_root)
- return -EINVAL;
+ if (!quota_root) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (inherit) {
+ i_qgroups = (u64 *)(inherit + 1);
+ nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+ 2 * inherit->num_excl_copies;
+ for (i = 0; i < nums; ++i) {
+ srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
+ if (!srcgroup) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ++i_qgroups;
+ }
+ }
/*
* create a tracking group for the subvol itself
@@ -1471,6 +1703,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
unlock:
spin_unlock(&fs_info->qgroup_lock);
out:
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
}
@@ -1515,7 +1748,10 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
ret = -ENOMEM;
goto out;
}
- ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+ ret = ulist_add(ulist, qgroup->qgroupid,
+ (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
@@ -1524,23 +1760,27 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
- qg->reserved + qg->rfer + num_bytes >
- qg->max_rfer)
+ qg->reserved + (s64)qg->rfer + num_bytes >
+ qg->max_rfer) {
ret = -EDQUOT;
+ goto out;
+ }
if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
- qg->reserved + qg->excl + num_bytes >
- qg->max_excl)
+ qg->reserved + (s64)qg->excl + num_bytes >
+ qg->max_excl) {
ret = -EDQUOT;
+ goto out;
+ }
list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(ulist, glist->group->qgroupid,
- (uintptr_t)glist->group, GFP_ATOMIC);
+ ret = ulist_add(ulist, glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
}
}
- if (ret)
- goto out;
-
+ ret = 0;
/*
* no limits exceeded, now record the reservation into all qgroups
*/
@@ -1569,6 +1809,7 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
struct ulist_node *unode;
struct ulist_iterator uiter;
u64 ref_root = root->root_key.objectid;
+ int ret = 0;
if (!is_fstree(ref_root))
return;
@@ -1591,7 +1832,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
btrfs_std_error(fs_info, -ENOMEM);
goto out;
}
- ulist_add(ulist, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
+ ret = ulist_add(ulist, qgroup->qgroupid,
+ (uintptr_t)qgroup, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
ULIST_ITER_INIT(&uiter);
while ((unode = ulist_next(ulist, &uiter))) {
struct btrfs_qgroup *qg;
@@ -1602,8 +1846,10 @@ void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
qg->reserved -= num_bytes;
list_for_each_entry(glist, &qg->groups, next_group) {
- ulist_add(ulist, glist->group->qgroupid,
- (uintptr_t)glist->group, GFP_ATOMIC);
+ ret = ulist_add(ulist, glist->group->qgroupid,
+ (uintptr_t)glist->group, GFP_ATOMIC);
+ if (ret < 0)
+ goto out;
}
}
@@ -1616,8 +1862,265 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
{
if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
return;
- printk(KERN_ERR "btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %llu\n",
+ pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
- trans->delayed_ref_elem.seq);
+ (u32)(trans->delayed_ref_elem.seq >> 32),
+ (u32)trans->delayed_ref_elem.seq);
BUG();
}
+
+/*
+ * returns < 0 on error, 0 when more leafs are to be scanned.
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ */
+static int
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
+ struct btrfs_trans_handle *trans, struct ulist *tmp,
+ struct extent_buffer *scratch_leaf)
+{
+ struct btrfs_key found;
+ struct btrfs_fs_info *fs_info = qscan->fs_info;
+ struct ulist *roots = NULL;
+ struct ulist_node *unode;
+ struct ulist_iterator uiter;
+ struct seq_list tree_mod_seq_elem = {};
+ u64 seq;
+ int slot;
+ int ret;
+
+ path->leave_spinning = 1;
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ ret = btrfs_search_slot_for_read(fs_info->extent_root,
+ &fs_info->qgroup_rescan_progress,
+ path, 1, 0);
+
+ pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
+ (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+ fs_info->qgroup_rescan_progress.type,
+ (unsigned long long)fs_info->qgroup_rescan_progress.offset,
+ ret);
+
+ if (ret) {
+ /*
+ * The rescan is about to end, we will not be scanning any
+ * further blocks. We cannot unset the RESCAN flag here, because
+ * we want to commit the transaction if everything went well.
+ * To make the live accounting work in this phase, we set our
+ * scan progress pointer such that every real extent objectid
+ * will be smaller.
+ */
+ fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+ btrfs_release_path(path);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ return ret;
+ }
+
+ btrfs_item_key_to_cpu(path->nodes[0], &found,
+ btrfs_header_nritems(path->nodes[0]) - 1);
+ fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+ btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+ memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+ slot = path->slots[0];
+ btrfs_release_path(path);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+ btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
+ if (found.type != BTRFS_EXTENT_ITEM_KEY)
+ continue;
+ ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
+ tree_mod_seq_elem.seq, &roots);
+ if (ret < 0)
+ goto out;
+ spin_lock(&fs_info->qgroup_lock);
+ seq = fs_info->qgroup_seq;
+ fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+ ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+ if (ret) {
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ goto out;
+ }
+
+ /*
+ * step2 of btrfs_qgroup_account_ref works from a single root,
+ * we're doing all at once here.
+ */
+ ulist_reinit(tmp);
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(roots, &uiter))) {
+ struct btrfs_qgroup *qg;
+
+ qg = find_qgroup_rb(fs_info, unode->val);
+ if (!qg)
+ continue;
+
+ ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
+ GFP_ATOMIC);
+ if (ret < 0) {
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ goto out;
+ }
+ }
+
+ /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
+ ULIST_ITER_INIT(&uiter);
+ while ((unode = ulist_next(tmp, &uiter))) {
+ struct btrfs_qgroup *qg;
+ struct btrfs_qgroup_list *glist;
+
+ qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
+ qg->rfer += found.offset;
+ qg->rfer_cmpr += found.offset;
+ WARN_ON(qg->tag >= seq);
+ if (qg->refcnt - seq == roots->nnodes) {
+ qg->excl += found.offset;
+ qg->excl_cmpr += found.offset;
+ }
+ qgroup_dirty(fs_info, qg);
+
+ list_for_each_entry(glist, &qg->groups, next_group) {
+ ret = ulist_add(tmp, glist->group->qgroupid,
+ (uintptr_t)glist->group,
+ GFP_ATOMIC);
+ if (ret < 0) {
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ goto out;
+ }
+ }
+ }
+
+ spin_unlock(&fs_info->qgroup_lock);
+ ulist_free(roots);
+ ret = 0;
+ }
+
+out:
+ btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+ return ret;
+}
+
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+{
+ struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
+ work);
+ struct btrfs_path *path;
+ struct btrfs_trans_handle *trans = NULL;
+ struct btrfs_fs_info *fs_info = qscan->fs_info;
+ struct ulist *tmp = NULL;
+ struct extent_buffer *scratch_leaf = NULL;
+ int err = -ENOMEM;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out;
+ tmp = ulist_alloc(GFP_NOFS);
+ if (!tmp)
+ goto out;
+ scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
+ if (!scratch_leaf)
+ goto out;
+
+ err = 0;
+ while (!err) {
+ trans = btrfs_start_transaction(fs_info->fs_root, 0);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ break;
+ }
+ if (!fs_info->quota_enabled) {
+ err = -EINTR;
+ } else {
+ err = qgroup_rescan_leaf(qscan, path, trans,
+ tmp, scratch_leaf);
+ }
+ if (err > 0)
+ btrfs_commit_transaction(trans, fs_info->fs_root);
+ else
+ btrfs_end_transaction(trans, fs_info->fs_root);
+ }
+
+out:
+ kfree(scratch_leaf);
+ ulist_free(tmp);
+ btrfs_free_path(path);
+ kfree(qscan);
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+
+ if (err == 2 &&
+ fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ } else if (err < 0) {
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+ }
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ if (err >= 0) {
+ pr_info("btrfs: qgroup scan completed%s\n",
+ err == 2 ? " (inconsistency flag cleared)" : "");
+ } else {
+ pr_err("btrfs: qgroup scan failed with %d\n", err);
+ }
+}
+
+static void
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
+{
+ memset(&qscan->work, 0, sizeof(qscan->work));
+ qscan->work.func = btrfs_qgroup_rescan_worker;
+ qscan->fs_info = fs_info;
+
+ pr_info("btrfs: qgroup scan started\n");
+ btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
+}
+
+int
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+{
+ int ret = 0;
+ struct rb_node *n;
+ struct btrfs_qgroup *qgroup;
+ struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
+
+ if (!qscan)
+ return -ENOMEM;
+
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ spin_lock(&fs_info->qgroup_lock);
+ if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+ ret = -EINPROGRESS;
+ else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+ ret = -EINVAL;
+ if (ret) {
+ spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+ kfree(qscan);
+ return ret;
+ }
+
+ fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+ memset(&fs_info->qgroup_rescan_progress, 0,
+ sizeof(fs_info->qgroup_rescan_progress));
+
+ /* clear all current qgroup tracking information */
+ for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
+ qgroup = rb_entry(n, struct btrfs_qgroup, node);
+ qgroup->rfer = 0;
+ qgroup->rfer_cmpr = 0;
+ qgroup->excl = 0;
+ qgroup->excl_cmpr = 0;
+ }
+ spin_unlock(&fs_info->qgroup_lock);
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+ qgroup_rescan_start(fs_info, qscan);
+
+ return 0;
+}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 9a79fb790adb..0525e1389f5b 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -410,7 +410,7 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
/*
* remove everything in the cache
*/
-void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
+static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
{
struct btrfs_stripe_hash_table *table;
unsigned long flags;
@@ -1010,12 +1010,12 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
* this will try to merge into existing bios if possible, and returns
* zero if all went well.
*/
-int rbio_add_io_page(struct btrfs_raid_bio *rbio,
- struct bio_list *bio_list,
- struct page *page,
- int stripe_nr,
- unsigned long page_index,
- unsigned long bio_max_len)
+static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
+ struct bio_list *bio_list,
+ struct page *page,
+ int stripe_nr,
+ unsigned long page_index,
+ unsigned long bio_max_len)
{
struct bio *last = bio_list->tail;
u64 last_end = 0;
@@ -1050,7 +1050,7 @@ int rbio_add_io_page(struct btrfs_raid_bio *rbio,
}
/* put a new bio on the list */
- bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
if (!bio)
return -ENOMEM;
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 96b93daa0bbb..1031b69252c5 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -955,10 +955,11 @@ int btrfs_reada_wait(void *handle)
while (atomic_read(&rc->elems)) {
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
5 * HZ);
- dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+ dump_devs(rc->root->fs_info,
+ atomic_read(&rc->elems) < 10 ? 1 : 0);
}
- dump_devs(rc->root->fs_info, rc->elems < 10 ? 1 : 0);
+ dump_devs(rc->root->fs_info, atomic_read(&rc->elems) < 10 ? 1 : 0);
kref_put(&rc->refcnt, reada_control_release);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 50695dc5e2ab..395b82031a42 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -326,8 +326,7 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
return NULL;
}
-void backref_tree_panic(struct rb_node *rb_node, int errno,
- u64 bytenr)
+static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
{
struct btrfs_fs_info *fs_info = NULL;
@@ -619,10 +618,13 @@ static noinline_for_stack
int find_inline_backref(struct extent_buffer *leaf, int slot,
unsigned long *ptr, unsigned long *end)
{
+ struct btrfs_key key;
struct btrfs_extent_item *ei;
struct btrfs_tree_block_info *bi;
u32 item_size;
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
item_size = btrfs_item_size_nr(leaf, slot);
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (item_size < sizeof(*ei)) {
@@ -634,13 +636,18 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
WARN_ON(!(btrfs_extent_flags(leaf, ei) &
BTRFS_EXTENT_FLAG_TREE_BLOCK));
- if (item_size <= sizeof(*ei) + sizeof(*bi)) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY &&
+ item_size <= sizeof(*ei) + sizeof(*bi)) {
WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
return 1;
}
- bi = (struct btrfs_tree_block_info *)(ei + 1);
- *ptr = (unsigned long)(bi + 1);
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ bi = (struct btrfs_tree_block_info *)(ei + 1);
+ *ptr = (unsigned long)(bi + 1);
+ } else {
+ *ptr = (unsigned long)(ei + 1);
+ }
*end = (unsigned long)ei + item_size;
return 0;
}
@@ -708,7 +715,7 @@ again:
end = 0;
ptr = 0;
key.objectid = cur->bytenr;
- key.type = BTRFS_EXTENT_ITEM_KEY;
+ key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = (u64)-1;
path1->search_commit_root = 1;
@@ -766,7 +773,8 @@ again:
break;
}
- if (key.type == BTRFS_EXTENT_ITEM_KEY) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+ key.type == BTRFS_METADATA_ITEM_KEY) {
ret = find_inline_backref(eb, path1->slots[0],
&ptr, &end);
if (ret)
@@ -1269,6 +1277,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
}
spin_unlock(&rc->reloc_root_tree.lock);
+ if (!node)
+ return 0;
BUG_ON((struct btrfs_root *)node->data != root);
if (!del) {
@@ -1760,7 +1770,11 @@ again:
eb = read_tree_block(dest, old_bytenr, blocksize,
old_ptr_gen);
- BUG_ON(!eb);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ ret = (!eb) ? -ENOMEM : -EIO;
+ free_extent_buffer(eb);
+ break;
+ }
btrfs_tree_lock(eb);
if (cow) {
ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -1913,6 +1927,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
bytenr = btrfs_node_blockptr(eb, path->slots[i]);
blocksize = btrfs_level_size(root, i - 1);
eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ return -EIO;
+ }
BUG_ON(btrfs_header_level(eb) != i - 1);
path->nodes[i - 1] = eb;
path->slots[i - 1] = 0;
@@ -2238,13 +2256,28 @@ again:
}
static noinline_for_stack
+void free_reloc_roots(struct list_head *list)
+{
+ struct btrfs_root *reloc_root;
+
+ while (!list_empty(list)) {
+ reloc_root = list_entry(list->next, struct btrfs_root,
+ root_list);
+ __update_reloc_root(reloc_root, 1);
+ free_extent_buffer(reloc_root->node);
+ free_extent_buffer(reloc_root->commit_root);
+ kfree(reloc_root);
+ }
+}
+
+static noinline_for_stack
int merge_reloc_roots(struct reloc_control *rc)
{
struct btrfs_root *root;
struct btrfs_root *reloc_root;
LIST_HEAD(reloc_roots);
int found = 0;
- int ret;
+ int ret = 0;
again:
root = rc->extent_root;
@@ -2270,20 +2303,33 @@ again:
BUG_ON(root->reloc_root != reloc_root);
ret = merge_reloc_root(rc, root);
- BUG_ON(ret);
+ if (ret)
+ goto out;
} else {
list_del_init(&reloc_root->root_list);
}
ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1);
- BUG_ON(ret < 0);
+ if (ret < 0) {
+ if (list_empty(&reloc_root->root_list))
+ list_add_tail(&reloc_root->root_list,
+ &reloc_roots);
+ goto out;
+ }
}
if (found) {
found = 0;
goto again;
}
+out:
+ if (ret) {
+ btrfs_std_error(root->fs_info, ret);
+ if (!list_empty(&reloc_roots))
+ free_reloc_roots(&reloc_roots);
+ }
+
BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
- return 0;
+ return ret;
}
static void free_block_list(struct rb_root *blocks)
@@ -2562,7 +2608,8 @@ static int do_relocation(struct btrfs_trans_handle *trans,
blocksize = btrfs_level_size(root, node->level);
generation = btrfs_node_ptr_generation(upper->eb, slot);
eb = read_tree_block(root, bytenr, blocksize, generation);
- if (!eb) {
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
err = -EIO;
goto next;
}
@@ -2723,7 +2770,10 @@ static int get_tree_block_key(struct reloc_control *rc,
BUG_ON(block->key_ready);
eb = read_tree_block(rc->extent_root, block->bytenr,
block->key.objectid, block->key.offset);
- BUG_ON(!eb);
+ if (!eb || !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ return -EIO;
+ }
WARN_ON(btrfs_header_level(eb) != block->level);
if (block->level == 0)
btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -2738,8 +2788,13 @@ static int reada_tree_block(struct reloc_control *rc,
struct tree_block *block)
{
BUG_ON(block->key_ready);
- readahead_tree_block(rc->extent_root, block->bytenr,
- block->key.objectid, block->key.offset);
+ if (block->key.type == BTRFS_METADATA_ITEM_KEY)
+ readahead_tree_block(rc->extent_root, block->bytenr,
+ block->key.objectid,
+ rc->extent_root->leafsize);
+ else
+ readahead_tree_block(rc->extent_root, block->bytenr,
+ block->key.objectid, block->key.offset);
return 0;
}
@@ -2818,8 +2873,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
int err = 0;
path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ err = -ENOMEM;
+ goto out_free_blocks;
+ }
rb_node = rb_first(blocks);
while (rb_node) {
@@ -2832,8 +2889,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
rb_node = rb_first(blocks);
while (rb_node) {
block = rb_entry(rb_node, struct tree_block, rb_node);
- if (!block->key_ready)
- get_tree_block_key(rc, block);
+ if (!block->key_ready) {
+ err = get_tree_block_key(rc, block);
+ if (err)
+ goto out_free_path;
+ }
rb_node = rb_next(rb_node);
}
@@ -2858,10 +2918,12 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,
rb_node = rb_next(rb_node);
}
out:
- free_block_list(blocks);
err = finish_pending_nodes(trans, rc, path, err);
+out_free_path:
btrfs_free_path(path);
+out_free_blocks:
+ free_block_list(blocks);
return err;
}
@@ -2932,7 +2994,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
while (1) {
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
+ ret = add_extent_mapping(em_tree, em, 0);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -3143,12 +3205,17 @@ static int add_tree_block(struct reloc_control *rc,
eb = path->nodes[0];
item_size = btrfs_item_size_nr(eb, path->slots[0]);
- if (item_size >= sizeof(*ei) + sizeof(*bi)) {
+ if (extent_key->type == BTRFS_METADATA_ITEM_KEY ||
+ item_size >= sizeof(*ei) + sizeof(*bi)) {
ei = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_extent_item);
- bi = (struct btrfs_tree_block_info *)(ei + 1);
+ if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) {
+ bi = (struct btrfs_tree_block_info *)(ei + 1);
+ level = btrfs_tree_block_level(eb, bi);
+ } else {
+ level = (int)extent_key->offset;
+ }
generation = btrfs_extent_generation(eb, ei);
- level = btrfs_tree_block_level(eb, bi);
} else {
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
u64 ref_owner;
@@ -3177,7 +3244,7 @@ static int add_tree_block(struct reloc_control *rc,
return -ENOMEM;
block->bytenr = extent_key->objectid;
- block->key.objectid = extent_key->offset;
+ block->key.objectid = rc->extent_root->leafsize;
block->key.offset = generation;
block->level = level;
block->key_ready = 0;
@@ -3219,9 +3286,15 @@ static int __add_tree_block(struct reloc_control *rc,
ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
- BUG_ON(ret);
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (ret > 0) {
+ if (key.objectid == bytenr &&
+ key.type == BTRFS_METADATA_ITEM_KEY)
+ ret = 0;
+ }
+ BUG_ON(ret);
+
ret = add_tree_block(rc, &key, path, blocks);
out:
btrfs_free_path(path);
@@ -3242,7 +3315,8 @@ static int block_use_full_backref(struct reloc_control *rc,
return 1;
ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
- eb->start, eb->len, NULL, &flags);
+ eb->start, btrfs_header_level(eb), 1,
+ NULL, &flags);
BUG_ON(ret);
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
@@ -3276,6 +3350,11 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
}
truncate:
+ ret = btrfs_check_trunc_cache_free_space(root,
+ &fs_info->global_block_rsv);
+ if (ret)
+ goto out;
+
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
@@ -3611,12 +3690,25 @@ next:
break;
}
- if (key.type != BTRFS_EXTENT_ITEM_KEY ||
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY) {
+ path->slots[0]++;
+ goto next;
+ }
+
+ if (key.type == BTRFS_EXTENT_ITEM_KEY &&
key.objectid + key.offset <= rc->search_start) {
path->slots[0]++;
goto next;
}
+ if (key.type == BTRFS_METADATA_ITEM_KEY &&
+ key.objectid + rc->extent_root->leafsize <=
+ rc->search_start) {
+ path->slots[0]++;
+ goto next;
+ }
+
ret = find_first_extent_bit(&rc->processed_blocks,
key.objectid, &start, &end,
EXTENT_DIRTY, NULL);
@@ -3625,7 +3717,11 @@ next:
btrfs_release_path(path);
rc->search_start = end + 1;
} else {
- rc->search_start = key.objectid + key.offset;
+ if (key.type == BTRFS_EXTENT_ITEM_KEY)
+ rc->search_start = key.objectid + key.offset;
+ else
+ rc->search_start = key.objectid +
+ rc->extent_root->leafsize;
memcpy(extent_key, &key, sizeof(key));
return 0;
}
@@ -3698,7 +3794,15 @@ int prepare_to_relocate(struct reloc_control *rc)
set_reloc_control(rc);
trans = btrfs_join_transaction(rc->extent_root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ unset_reloc_control(rc);
+ /*
+ * extent tree is not a ref_cow tree and has no reloc_root to
+ * cleanup. And callers are responsible to free the above
+ * block rsv.
+ */
+ return PTR_ERR(trans);
+ }
btrfs_commit_transaction(trans, rc->extent_root);
return 0;
}
@@ -3730,7 +3834,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
while (1) {
progress++;
trans = btrfs_start_transaction(rc->extent_root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ trans = NULL;
+ break;
+ }
restart:
if (update_backref_cache(trans, &rc->backref_cache)) {
btrfs_end_transaction(trans, rc->extent_root);
@@ -4060,10 +4168,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
while (1) {
mutex_lock(&fs_info->cleaner_mutex);
-
- btrfs_clean_old_snapshots(fs_info->tree_root);
ret = relocate_block_group(rc);
-
mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) {
err = ret;
@@ -4264,14 +4369,9 @@ int btrfs_recover_relocation(struct btrfs_root *root)
out_free:
kfree(rc);
out:
- while (!list_empty(&reloc_roots)) {
- reloc_root = list_entry(reloc_roots.next,
- struct btrfs_root, root_list);
- list_del(&reloc_root->root_list);
- free_extent_buffer(reloc_root->node);
- free_extent_buffer(reloc_root->commit_root);
- kfree(reloc_root);
- }
+ if (!list_empty(&reloc_roots))
+ free_reloc_roots(&reloc_roots);
+
btrfs_free_path(path);
if (err == 0) {
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 668af537a3ea..5bf1ed57f178 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -29,9 +29,8 @@
* generation numbers as then we know the root was once mounted with an older
* kernel that was not aware of the root item structure change.
*/
-void btrfs_read_root_item(struct btrfs_root *root,
- struct extent_buffer *eb, int slot,
- struct btrfs_root_item *item)
+void btrfs_read_root_item(struct extent_buffer *eb, int slot,
+ struct btrfs_root_item *item)
{
uuid_le uuid;
int len;
@@ -104,7 +103,7 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
goto out;
}
if (item)
- btrfs_read_root_item(root, l, slot, item);
+ btrfs_read_root_item(l, slot, item);
if (key)
memcpy(key, &found_key, sizeof(found_key));
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 53c3501fa4ca..79bd479317cb 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -542,7 +542,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
eb = path->nodes[0];
ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
item_size = btrfs_item_size_nr(eb, path->slots[0]);
- btrfs_release_path(path);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
@@ -558,7 +557,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
ret < 0 ? -1 : ref_level,
ret < 0 ? -1 : ref_root);
} while (ret != 1);
+ btrfs_release_path(path);
} else {
+ btrfs_release_path(path);
swarn.path = path;
swarn.dev = dev;
iterate_extent_inodes(fs_info, found_key.objectid,
@@ -1295,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}
WARN_ON(!page->page);
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
@@ -1335,7 +1336,6 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
int page_num;
u8 calculated_csum[BTRFS_CSUM_SIZE];
u32 crc = ~(u32)0;
- struct btrfs_root *root = fs_info->extent_root;
void *mapped_buffer;
WARN_ON(!sblock->pagev[0]->page);
@@ -1364,12 +1364,11 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
for (page_num = 0;;) {
if (page_num == 0 && is_metadata)
- crc = btrfs_csum_data(root,
+ crc = btrfs_csum_data(
((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE,
crc, PAGE_SIZE - BTRFS_CSUM_SIZE);
else
- crc = btrfs_csum_data(root, mapped_buffer, crc,
- PAGE_SIZE);
+ crc = btrfs_csum_data(mapped_buffer, crc, PAGE_SIZE);
kunmap_atomic(mapped_buffer);
page_num++;
@@ -1432,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
@@ -1523,7 +1522,7 @@ again:
sbio->dev = wr_ctx->tgtdev;
bio = sbio->bio;
if (!bio) {
- bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -1656,7 +1655,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
void *buffer;
u32 crc = ~(u32)0;
int fail = 0;
- struct btrfs_root *root = sctx->dev_root;
u64 len;
int index;
@@ -1673,7 +1671,7 @@ static int scrub_checksum_data(struct scrub_block *sblock)
for (;;) {
u64 l = min_t(u64, len, PAGE_SIZE);
- crc = btrfs_csum_data(root, buffer, crc, l);
+ crc = btrfs_csum_data(buffer, crc, l);
kunmap_atomic(buffer);
len -= l;
if (len == 0)
@@ -1743,7 +1741,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
for (;;) {
u64 l = min_t(u64, len, mapped_size);
- crc = btrfs_csum_data(root, p, crc, l);
+ crc = btrfs_csum_data(p, crc, l);
kunmap_atomic(mapped_buffer);
len -= l;
if (len == 0)
@@ -1804,7 +1802,7 @@ static int scrub_checksum_super(struct scrub_block *sblock)
for (;;) {
u64 l = min_t(u64, len, mapped_size);
- crc = btrfs_csum_data(root, p, crc, l);
+ crc = btrfs_csum_data(p, crc, l);
kunmap_atomic(mapped_buffer);
len -= l;
if (len == 0)
@@ -1932,7 +1930,7 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
- bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
sbio->bio = bio;
@@ -2235,12 +2233,12 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 flags;
int ret;
int slot;
- int i;
u64 nstripes;
struct extent_buffer *l;
struct btrfs_key key;
u64 physical;
u64 logical;
+ u64 logic_end;
u64 generation;
int mirror_num;
struct reada_control *reada1;
@@ -2254,6 +2252,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 extent_len;
struct btrfs_device *extent_dev;
int extent_mirror_num;
+ int stop_loop;
if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
BTRFS_BLOCK_GROUP_RAID6)) {
@@ -2314,8 +2313,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
key_start.type = BTRFS_EXTENT_ITEM_KEY;
key_start.offset = (u64)0;
key_end.objectid = base + offset + nstripes * increment;
- key_end.type = BTRFS_EXTENT_ITEM_KEY;
- key_end.offset = (u64)0;
+ key_end.type = BTRFS_METADATA_ITEM_KEY;
+ key_end.offset = (u64)-1;
reada1 = btrfs_reada_add(root, &key_start, &key_end);
key_start.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -2353,8 +2352,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
*/
logical = base + offset;
physical = map->stripes[num].physical;
+ logic_end = logical + increment * nstripes;
ret = 0;
- for (i = 0; i < nstripes; ++i) {
+ while (logical < logic_end) {
/*
* canceled?
*/
@@ -2390,19 +2390,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
wake_up(&fs_info->scrub_pause_wait);
}
- ret = btrfs_lookup_csums_range(csum_root, logical,
- logical + map->stripe_len - 1,
- &sctx->csum_list, 1);
- if (ret)
- goto out;
-
key.objectid = logical;
key.type = BTRFS_EXTENT_ITEM_KEY;
- key.offset = (u64)0;
+ key.offset = (u64)-1;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
+
if (ret > 0) {
ret = btrfs_previous_item(root, path, 0,
BTRFS_EXTENT_ITEM_KEY);
@@ -2419,7 +2414,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
}
}
+ stop_loop = 0;
while (1) {
+ u64 bytes;
+
l = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(l)) {
@@ -2429,19 +2427,30 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
if (ret < 0)
goto out;
+ stop_loop = 1;
break;
}
btrfs_item_key_to_cpu(l, &key, slot);
- if (key.objectid + key.offset <= logical)
- goto next;
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ bytes = root->leafsize;
+ else
+ bytes = key.offset;
- if (key.objectid >= logical + map->stripe_len)
- break;
+ if (key.objectid + bytes <= logical)
+ goto next;
- if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
+ if (key.type != BTRFS_EXTENT_ITEM_KEY &&
+ key.type != BTRFS_METADATA_ITEM_KEY)
goto next;
+ if (key.objectid >= logical + map->stripe_len) {
+ /* out of this device extent */
+ if (key.objectid >= logic_end)
+ stop_loop = 1;
+ break;
+ }
+
extent = btrfs_item_ptr(l, slot,
struct btrfs_extent_item);
flags = btrfs_extent_flags(l, extent);
@@ -2457,22 +2466,24 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
goto next;
}
+again:
+ extent_logical = key.objectid;
+ extent_len = bytes;
+
/*
* trim extent to this stripe
*/
- if (key.objectid < logical) {
- key.offset -= logical - key.objectid;
- key.objectid = logical;
+ if (extent_logical < logical) {
+ extent_len -= logical - extent_logical;
+ extent_logical = logical;
}
- if (key.objectid + key.offset >
+ if (extent_logical + extent_len >
logical + map->stripe_len) {
- key.offset = logical + map->stripe_len -
- key.objectid;
+ extent_len = logical + map->stripe_len -
+ extent_logical;
}
- extent_logical = key.objectid;
- extent_physical = key.objectid - logical + physical;
- extent_len = key.offset;
+ extent_physical = extent_logical - logical + physical;
extent_dev = scrub_dev;
extent_mirror_num = mirror_num;
if (is_dev_replace)
@@ -2480,13 +2491,35 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
extent_len, &extent_physical,
&extent_dev,
&extent_mirror_num);
+
+ ret = btrfs_lookup_csums_range(csum_root, logical,
+ logical + map->stripe_len - 1,
+ &sctx->csum_list, 1);
+ if (ret)
+ goto out;
+
ret = scrub_extent(sctx, extent_logical, extent_len,
extent_physical, extent_dev, flags,
generation, extent_mirror_num,
- key.objectid - logical + physical);
+ extent_physical);
if (ret)
goto out;
+ if (extent_logical + extent_len <
+ key.objectid + bytes) {
+ logical += increment;
+ physical += map->stripe_len;
+
+ if (logical < key.objectid + bytes) {
+ cond_resched();
+ goto again;
+ }
+
+ if (logical >= logic_end) {
+ stop_loop = 1;
+ break;
+ }
+ }
next:
path->slots[0]++;
}
@@ -2494,8 +2527,14 @@ next:
logical += increment;
physical += map->stripe_len;
spin_lock(&sctx->stat_lock);
- sctx->stat.last_physical = physical;
+ if (stop_loop)
+ sctx->stat.last_physical = map->stripes[num].physical +
+ length;
+ else
+ sctx->stat.last_physical = physical;
spin_unlock(&sctx->stat_lock);
+ if (stop_loop)
+ break;
}
out:
/* push queued extents */
@@ -3004,28 +3043,6 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info,
return 0;
}
-int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid)
-{
- struct btrfs_fs_info *fs_info = root->fs_info;
- struct btrfs_device *dev;
- int ret;
-
- /*
- * we have to hold the device_list_mutex here so the device
- * does not go away in cancel_dev. FIXME: find a better solution
- */
- mutex_lock(&fs_info->fs_devices->device_list_mutex);
- dev = btrfs_find_device(fs_info, devid, NULL, NULL);
- if (!dev) {
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
- return -ENODEV;
- }
- ret = btrfs_scrub_cancel_dev(fs_info, dev);
- mutex_unlock(&fs_info->fs_devices->device_list_mutex);
-
- return ret;
-}
-
int btrfs_scrub_progress(struct btrfs_root *root, u64 devid,
struct btrfs_scrub_progress *progress)
{
@@ -3290,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
"btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
return -EIO;
}
- bio = bio_alloc(GFP_NOFS, 1);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f7a8b861058b..ff40f1c00ce3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -387,7 +387,7 @@ static struct btrfs_path *alloc_path_for_send(void)
return path;
}
-int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
+static int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off)
{
int ret;
mm_segment_t old_fs;
@@ -3479,7 +3479,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
struct send_ctx *sctx = ctx;
char *found_data = NULL;
int found_data_len = 0;
- struct fs_path *p = NULL;
ret = find_xattr(sctx, sctx->parent_root, sctx->right_path,
sctx->cmp_key, name, name_len, &found_data,
@@ -3498,7 +3497,6 @@ static int __process_changed_new_xattr(int num, struct btrfs_key *di_key,
}
kfree(found_data);
- fs_path_free(sctx, p);
return ret;
}
@@ -3945,12 +3943,10 @@ static int is_extent_unchanged(struct send_ctx *sctx,
found_key.type != key.type) {
key.offset += right_len;
break;
- } else {
- if (found_key.offset != key.offset + right_len) {
- /* Should really not happen */
- ret = -EIO;
- goto out;
- }
+ }
+ if (found_key.offset != key.offset + right_len) {
+ ret = 0;
+ goto out;
}
key = found_key;
}
@@ -4531,9 +4527,11 @@ static int send_subvol(struct send_ctx *sctx)
{
int ret;
- ret = send_header(sctx);
- if (ret < 0)
- goto out;
+ if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
+ ret = send_header(sctx);
+ if (ret < 0)
+ goto out;
+ }
ret = send_subvol_begin(sctx);
if (ret < 0)
@@ -4595,7 +4593,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out;
}
- if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) {
+ if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
ret = -EINVAL;
goto out;
}
@@ -4614,8 +4612,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
sctx->flags = arg->flags;
sctx->send_filp = fget(arg->send_fd);
- if (IS_ERR(sctx->send_filp)) {
- ret = PTR_ERR(sctx->send_filp);
+ if (!sctx->send_filp) {
+ ret = -EBADF;
goto out;
}
@@ -4706,12 +4704,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
if (ret < 0)
goto out;
- ret = begin_cmd(sctx, BTRFS_SEND_C_END);
- if (ret < 0)
- goto out;
- ret = send_cmd(sctx);
- if (ret < 0)
- goto out;
+ if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
+ ret = begin_cmd(sctx, BTRFS_SEND_C_END);
+ if (ret < 0)
+ goto out;
+ ret = send_cmd(sctx);
+ if (ret < 0)
+ goto out;
+ }
out:
kfree(arg);
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 8bb18f7ccaa6..48d425aef05b 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -131,5 +131,4 @@ enum {
#ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
-int write_buf(struct file *filp, const void *buf, u32 len, loff_t *off);
#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 68a29a1ea068..f0857e092a3c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,7 @@
#include "compression.h"
#include "rcu-string.h"
#include "dev-replace.h"
+#include "free-space-cache.h"
#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
@@ -63,9 +64,9 @@
static const struct super_operations btrfs_super_ops;
static struct file_system_type btrfs_fs_type;
-static const char *btrfs_decode_error(int errno, char nbuf[16])
+static const char *btrfs_decode_error(int errno)
{
- char *errstr = NULL;
+ char *errstr = "unknown";
switch (errno) {
case -EIO:
@@ -80,18 +81,18 @@ static const char *btrfs_decode_error(int errno, char nbuf[16])
case -EEXIST:
errstr = "Object already exists";
break;
- default:
- if (nbuf) {
- if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
- errstr = nbuf;
- }
+ case -ENOSPC:
+ errstr = "No space left";
+ break;
+ case -ENOENT:
+ errstr = "No such entry";
break;
}
return errstr;
}
-static void __save_error_info(struct btrfs_fs_info *fs_info)
+static void save_error_info(struct btrfs_fs_info *fs_info)
{
/*
* today we only save the error info into ram. Long term we'll
@@ -100,11 +101,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
}
-static void save_error_info(struct btrfs_fs_info *fs_info)
-{
- __save_error_info(fs_info);
-}
-
/* btrfs handle error by forcing the filesystem readonly */
static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
{
@@ -115,7 +111,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
sb->s_flags |= MS_RDONLY;
- printk(KERN_INFO "btrfs is forced readonly\n");
+ btrfs_info(fs_info, "forced readonly");
/*
* Note that a running device replace operation is not
* canceled here although there is no way to update
@@ -126,7 +122,6 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
* mounted writeable again, the device replace
* operation continues.
*/
-// WARN_ON(1);
}
}
@@ -139,7 +134,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...)
{
struct super_block *sb = fs_info->sb;
- char nbuf[16];
const char *errstr;
/*
@@ -149,7 +143,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
return;
- errstr = btrfs_decode_error(errno, nbuf);
+ errstr = btrfs_decode_error(errno);
if (fmt) {
struct va_format vaf;
va_list args;
@@ -158,19 +152,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
vaf.fmt = fmt;
vaf.va = &args;
- printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
- sb->s_id, function, line, errstr, &vaf);
+ printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s (%pV)\n",
+ sb->s_id, function, line, errno, errstr, &vaf);
va_end(args);
} else {
- printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
- sb->s_id, function, line, errstr);
+ printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: errno=%d %s\n",
+ sb->s_id, function, line, errno, errstr);
}
/* Don't go through full error handling during mount */
- if (sb->s_flags & MS_BORN) {
- save_error_info(fs_info);
+ save_error_info(fs_info);
+ if (sb->s_flags & MS_BORN)
btrfs_handle_error(fs_info);
- }
}
static const char * const logtypes[] = {
@@ -184,7 +177,7 @@ static const char * const logtypes[] = {
"debug",
};
-void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
+void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
struct super_block *sb = fs_info->sb;
char lvl[4];
@@ -208,7 +201,7 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
+ printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf);
va_end(args);
}
@@ -252,18 +245,24 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno)
{
- WARN_ONCE(1, KERN_DEBUG "btrfs: Transaction aborted\n");
+ /*
+ * Report first abort since mount
+ */
+ if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
+ &root->fs_info->fs_state)) {
+ WARN(1, KERN_DEBUG "btrfs: Transaction aborted (error %d)\n",
+ errno);
+ }
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
if (!trans->blocks_used) {
- char nbuf[16];
const char *errstr;
- errstr = btrfs_decode_error(errno, nbuf);
- btrfs_printk(root->fs_info,
- "%s:%d: Aborting unused transaction(%s).\n",
- function, line, errstr);
+ errstr = btrfs_decode_error(errno);
+ btrfs_warn(root->fs_info,
+ "%s:%d: Aborting unused transaction(%s).",
+ function, line, errstr);
return;
}
ACCESS_ONCE(trans->transaction->aborted) = errno;
@@ -276,7 +275,6 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...)
{
- char nbuf[16];
char *s_id = "<unknown>";
const char *errstr;
struct va_format vaf = { .fmt = fmt };
@@ -288,13 +286,13 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
va_start(args, fmt);
vaf.va = &args;
- errstr = btrfs_decode_error(errno, nbuf);
+ errstr = btrfs_decode_error(errno);
if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
- panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
- s_id, function, line, &vaf, errstr);
+ panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
+ s_id, function, line, &vaf, errno, errstr);
- printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
- s_id, function, line, &vaf, errstr);
+ printk(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n",
+ s_id, function, line, &vaf, errno, errstr);
va_end(args);
/* Caller calls BUG() */
}
@@ -650,7 +648,7 @@ out:
*/
static int btrfs_parse_early_options(const char *options, fmode_t flags,
void *holder, char **subvol_name, u64 *subvol_objectid,
- u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
+ struct btrfs_fs_devices **fs_devices)
{
substring_t args[MAX_OPT_ARGS];
char *device_name, *opts, *orig, *p;
@@ -693,16 +691,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
}
break;
case Opt_subvolrootid:
- intarg = 0;
- error = match_int(&args[0], &intarg);
- if (!error) {
- /* we want the original fs_tree */
- if (!intarg)
- *subvol_rootid =
- BTRFS_FS_TREE_OBJECTID;
- else
- *subvol_rootid = intarg;
- }
+ printk(KERN_WARNING
+ "btrfs: 'subvolrootid' mount option is deprecated and has no effect\n");
break;
case Opt_device:
device_name = match_strdup(&args[0]);
@@ -876,7 +866,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_wait_ordered_extents(root, 1);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
@@ -1080,7 +1070,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
fmode_t mode = FMODE_READ;
char *subvol_name = NULL;
u64 subvol_objectid = 0;
- u64 subvol_rootid = 0;
int error = 0;
if (!(flags & MS_RDONLY))
@@ -1088,7 +1077,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
error = btrfs_parse_early_options(data, mode, fs_type,
&subvol_name, &subvol_objectid,
- &subvol_rootid, &fs_devices);
+ &fs_devices);
if (error) {
kfree(subvol_name);
return ERR_PTR(error);
@@ -1202,11 +1191,14 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
new_pool_size);
}
-static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info,
- unsigned long old_opts, int flags)
+static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info)
{
set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+}
+static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info,
+ unsigned long old_opts, int flags)
+{
if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) &&
(!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) ||
(flags & MS_RDONLY))) {
@@ -1247,7 +1239,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
unsigned int old_metadata_ratio = fs_info->metadata_ratio;
int ret;
- btrfs_remount_prepare(fs_info, old_opts, *flags);
+ btrfs_remount_prepare(fs_info);
ret = btrfs_parse_options(root, data);
if (ret) {
@@ -1255,6 +1247,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
+ btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
@@ -1270,6 +1263,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
+ btrfs_pause_balance(fs_info);
ret = btrfs_commit_super(root);
if (ret)
@@ -1558,6 +1552,7 @@ static struct file_system_type btrfs_fs_type = {
.kill_sb = btrfs_kill_super,
.fs_flags = FS_REQUIRES_DEV,
};
+MODULE_ALIAS_FS("btrfs");
/*
* used by btrfsctl to scan devices when no FS is mounted
@@ -1738,6 +1733,10 @@ static int __init init_btrfs_fs(void)
btrfs_init_lockdep();
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ btrfs_test_free_space_cache();
+#endif
+
printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index e52da6fb1165..0544587d74f4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -34,7 +34,7 @@
#define BTRFS_ROOT_TRANS_TAG 0
-void put_transaction(struct btrfs_transaction *transaction)
+static void put_transaction(struct btrfs_transaction *transaction)
{
WARN_ON(atomic_read(&transaction->use_count) == 0);
if (atomic_dec_and_test(&transaction->use_count)) {
@@ -162,7 +162,7 @@ loop:
if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log))
WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when "
"creating a fresh transaction\n");
- atomic_set(&fs_info->tree_mod_seq, 0);
+ atomic64_set(&fs_info->tree_mod_seq, 0);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
@@ -625,14 +625,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
- /*
- * the same root has to be passed to start_transaction and
- * end_transaction. Subvolume quota depends on this.
- */
- WARN_ON(trans->root != root);
if (trans->qgroup_reserved) {
- btrfs_qgroup_free(root, trans->qgroup_reserved);
+ /*
+ * the same root has to be passed here between start_transaction
+ * and end_transaction. Subvolume quota depends on this.
+ */
+ btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
@@ -708,23 +707,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
int btrfs_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- int ret;
-
- ret = __btrfs_end_transaction(trans, root, 0);
- if (ret)
- return ret;
- return 0;
+ return __btrfs_end_transaction(trans, root, 0);
}
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- int ret;
-
- ret = __btrfs_end_transaction(trans, root, 1);
- if (ret)
- return ret;
- return 0;
+ return __btrfs_end_transaction(trans, root, 1);
}
int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
@@ -949,7 +938,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
int btrfs_add_dead_root(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- list_add(&root->root_list, &root->fs_info->dead_roots);
+ list_add_tail(&root->root_list, &root->fs_info->dead_roots);
spin_unlock(&root->fs_info->trans_lock);
return 0;
}
@@ -1052,7 +1041,12 @@ int btrfs_defrag_root(struct btrfs_root *root)
/*
* new snapshots need to be created at a very specific time in the
- * transaction commit. This does the actual creation
+ * transaction commit. This does the actual creation.
+ *
+ * Note:
+ * If the error which may affect the commitment of the current transaction
+ * happens, we should return the error number. If the error which just affect
+ * the creation of the pending snapshots, just return 0.
*/
static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
@@ -1071,7 +1065,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct extent_buffer *tmp;
struct extent_buffer *old;
struct timespec cur_time = CURRENT_TIME;
- int ret;
+ int ret = 0;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
@@ -1080,40 +1074,36 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
if (!path) {
- ret = pending->error = -ENOMEM;
- return ret;
+ pending->error = -ENOMEM;
+ return 0;
}
new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
if (!new_root_item) {
- ret = pending->error = -ENOMEM;
+ pending->error = -ENOMEM;
goto root_item_alloc_fail;
}
- ret = btrfs_find_free_objectid(tree_root, &objectid);
- if (ret) {
- pending->error = ret;
+ pending->error = btrfs_find_free_objectid(tree_root, &objectid);
+ if (pending->error)
goto no_free_objectid;
- }
btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
if (to_reserve > 0) {
- ret = btrfs_block_rsv_add(root, &pending->block_rsv,
- to_reserve,
- BTRFS_RESERVE_NO_FLUSH);
- if (ret) {
- pending->error = ret;
+ pending->error = btrfs_block_rsv_add(root,
+ &pending->block_rsv,
+ to_reserve,
+ BTRFS_RESERVE_NO_FLUSH);
+ if (pending->error)
goto no_free_objectid;
- }
}
- ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
- objectid, pending->inherit);
- if (ret) {
- pending->error = ret;
+ pending->error = btrfs_qgroup_inherit(trans, fs_info,
+ root->root_key.objectid,
+ objectid, pending->inherit);
+ if (pending->error)
goto no_free_objectid;
- }
key.objectid = objectid;
key.offset = (u64)-1;
@@ -1141,7 +1131,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
dentry->d_name.len, 0);
if (dir_item != NULL && !IS_ERR(dir_item)) {
pending->error = -EEXIST;
- goto fail;
+ goto dir_item_existed;
} else if (IS_ERR(dir_item)) {
ret = PTR_ERR(dir_item);
btrfs_abort_transaction(trans, root, ret);
@@ -1179,13 +1169,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
BTRFS_UUID_SIZE);
+ if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
+ memset(new_root_item->received_uuid, 0,
+ sizeof(new_root_item->received_uuid));
+ memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
+ memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
+ btrfs_set_root_stransid(new_root_item, 0);
+ btrfs_set_root_rtransid(new_root_item, 0);
+ }
new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
btrfs_set_root_otransid(new_root_item, trans->transid);
- memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
- memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
- btrfs_set_root_stransid(new_root_item, 0);
- btrfs_set_root_rtransid(new_root_item, 0);
old = btrfs_lock_root_node(root);
ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
@@ -1272,6 +1266,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
if (ret)
btrfs_abort_transaction(trans, root, ret);
fail:
+ pending->error = ret;
+dir_item_existed:
trans->block_rsv = rsv;
trans->bytes_reserved = 0;
no_free_objectid:
@@ -1287,12 +1283,17 @@ root_item_alloc_fail:
static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
- struct btrfs_pending_snapshot *pending;
+ struct btrfs_pending_snapshot *pending, *next;
struct list_head *head = &trans->transaction->pending_snapshots;
+ int ret = 0;
- list_for_each_entry(pending, head, list)
- create_pending_snapshot(trans, fs_info, pending);
- return 0;
+ list_for_each_entry_safe(pending, next, head, list) {
+ list_del(&pending->list);
+ ret = create_pending_snapshot(trans, fs_info, pending);
+ if (ret)
+ break;
+ }
+ return ret;
}
static void update_super_roots(struct btrfs_root *root)
@@ -1448,6 +1449,13 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, err);
spin_lock(&root->fs_info->trans_lock);
+
+ if (list_empty(&cur_trans->list)) {
+ spin_unlock(&root->fs_info->trans_lock);
+ btrfs_end_transaction(trans, root);
+ return;
+ }
+
list_del_init(&cur_trans->list);
if (cur_trans == root->fs_info->running_transaction) {
root->fs_info->trans_no_join = 1;
@@ -1473,6 +1481,10 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
current->journal_info = NULL;
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 0;
+ spin_unlock(&root->fs_info->trans_lock);
}
static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
@@ -1794,7 +1806,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = btrfs_write_and_wait_transaction(trans, root);
if (ret) {
btrfs_error(root->fs_info, ret,
- "Error while writing out transaction.");
+ "Error while writing out transaction");
mutex_unlock(&root->fs_info->tree_log_mutex);
goto cleanup_transaction;
}
@@ -1850,8 +1862,7 @@ cleanup_transaction:
btrfs_qgroup_free(root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
- btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
-// WARN_ON(1);
+ btrfs_warn(root->fs_info, "Skipping commit of aborted transaction.");
if (current->journal_info == trans)
current->journal_info = NULL;
cleanup_transaction(trans, root, ret);
@@ -1860,31 +1871,49 @@ cleanup_transaction:
}
/*
- * interface function to delete all the snapshots we have scheduled for deletion
+ * return < 0 if error
+ * 0 if there are no more dead_roots at the time of call
+ * 1 there are more to be processed, call me again
+ *
+ * The return value indicates there are certainly more snapshots to delete, but
+ * if there comes a new one during processing, it may return 0. We don't mind,
+ * because btrfs_commit_super will poke cleaner thread and it will process it a
+ * few seconds later.
*/
-int btrfs_clean_old_snapshots(struct btrfs_root *root)
+int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
{
- LIST_HEAD(list);
+ int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
+ if (fs_info->sb->s_flags & MS_RDONLY) {
+ pr_debug("btrfs: cleaner called for RO fs!\n");
+ return 0;
+ }
+
spin_lock(&fs_info->trans_lock);
- list_splice_init(&fs_info->dead_roots, &list);
+ if (list_empty(&fs_info->dead_roots)) {
+ spin_unlock(&fs_info->trans_lock);
+ return 0;
+ }
+ root = list_first_entry(&fs_info->dead_roots,
+ struct btrfs_root, root_list);
+ list_del(&root->root_list);
spin_unlock(&fs_info->trans_lock);
- while (!list_empty(&list)) {
- int ret;
-
- root = list_entry(list.next, struct btrfs_root, root_list);
- list_del(&root->root_list);
+ pr_debug("btrfs: cleaner removing %llu\n",
+ (unsigned long long)root->objectid);
- btrfs_kill_all_delayed_nodes(root);
+ btrfs_kill_all_delayed_nodes(root);
- if (btrfs_header_backref_rev(root->node) <
- BTRFS_MIXED_BACKREF_REV)
- ret = btrfs_drop_snapshot(root, NULL, 0, 0);
- else
- ret =btrfs_drop_snapshot(root, NULL, 1, 0);
- BUG_ON(ret < 0);
- }
- return 0;
+ if (btrfs_header_backref_rev(root->node) <
+ BTRFS_MIXED_BACKREF_REV)
+ ret = btrfs_drop_snapshot(root, NULL, 0, 0);
+ else
+ ret = btrfs_drop_snapshot(root, NULL, 1, 0);
+ /*
+ * If we encounter a transaction abort during snapshot cleaning, we
+ * don't want to crash here
+ */
+ BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
+ return 1;
}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 3c8e0d25c8e4..24c97335a59f 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -123,7 +123,7 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
int btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root);
-int btrfs_clean_old_snapshots(struct btrfs_root *root);
+int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
@@ -146,5 +146,4 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_blocked(struct btrfs_fs_info *info);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
-void put_transaction(struct btrfs_transaction *transaction);
#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c7ef569eb22a..c276ac9a0ec3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -277,17 +277,19 @@ static int process_one_buffer(struct btrfs_root *log,
struct extent_buffer *eb,
struct walk_control *wc, u64 gen)
{
+ int ret = 0;
+
if (wc->pin)
- btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
- eb->start, eb->len);
+ ret = btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
+ eb->start, eb->len);
- if (btrfs_buffer_uptodate(eb, gen, 0)) {
+ if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
if (wc->write)
btrfs_write_tree_block(eb);
if (wc->wait)
btrfs_wait_tree_block_writeback(eb);
}
- return 0;
+ return ret;
}
/*
@@ -317,6 +319,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
unsigned long src_ptr;
unsigned long dst_ptr;
int overwrite_root = 0;
+ bool inode_item = key->type == BTRFS_INODE_ITEM_KEY;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
overwrite_root = 1;
@@ -326,6 +329,9 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
/* look for the key in the destination tree */
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+ if (ret < 0)
+ return ret;
+
if (ret == 0) {
char *src_copy;
char *dst_copy;
@@ -367,6 +373,30 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
return 0;
}
+ /*
+ * We need to load the old nbytes into the inode so when we
+ * replay the extents we've logged we get the right nbytes.
+ */
+ if (inode_item) {
+ struct btrfs_inode_item *item;
+ u64 nbytes;
+
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ nbytes = btrfs_inode_nbytes(path->nodes[0], item);
+ item = btrfs_item_ptr(eb, slot,
+ struct btrfs_inode_item);
+ btrfs_set_inode_nbytes(eb, item, nbytes);
+ }
+ } else if (inode_item) {
+ struct btrfs_inode_item *item;
+
+ /*
+ * New inode, set nbytes to 0 so that the nbytes comes out
+ * properly when we replay the extents.
+ */
+ item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
+ btrfs_set_inode_nbytes(eb, item, 0);
}
insert:
btrfs_release_path(path);
@@ -380,9 +410,9 @@ insert:
found_size = btrfs_item_size_nr(path->nodes[0],
path->slots[0]);
if (found_size > item_size)
- btrfs_truncate_item(trans, root, path, item_size, 1);
+ btrfs_truncate_item(root, path, item_size, 1);
else if (found_size < item_size)
- btrfs_extend_item(trans, root, path,
+ btrfs_extend_item(root, path,
item_size - found_size);
} else if (ret) {
return ret;
@@ -486,7 +516,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
int found_type;
u64 extent_end;
u64 start = key->offset;
- u64 saved_nbytes;
+ u64 nbytes = 0;
struct btrfs_file_extent_item *item;
struct inode *inode = NULL;
unsigned long size;
@@ -496,10 +526,19 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
found_type = btrfs_file_extent_type(eb, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC)
- extent_end = start + btrfs_file_extent_num_bytes(eb, item);
- else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+ found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ nbytes = btrfs_file_extent_num_bytes(eb, item);
+ extent_end = start + nbytes;
+
+ /*
+ * We don't add to the inodes nbytes if we are prealloc or a
+ * hole.
+ */
+ if (btrfs_file_extent_disk_bytenr(eb, item) == 0)
+ nbytes = 0;
+ } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size = btrfs_file_extent_inline_len(eb, item);
+ nbytes = btrfs_file_extent_ram_bytes(eb, item);
extent_end = ALIGN(start + size, root->sectorsize);
} else {
ret = 0;
@@ -548,10 +587,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
- saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
ret = btrfs_drop_extents(trans, root, inode, start, extent_end, 1);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
@@ -561,7 +600,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_insert_empty_item(trans, root, path, key,
sizeof(*item));
- BUG_ON(ret);
+ if (ret)
+ goto out;
dest_offset = btrfs_item_ptr_offset(path->nodes[0],
path->slots[0]);
copy_extent_buffer(path->nodes[0], eb, dest_offset,
@@ -587,7 +627,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ins.objectid, ins.offset,
0, root->root_key.objectid,
key->objectid, offset, 0);
- BUG_ON(ret);
+ if (ret)
+ goto out;
} else {
/*
* insert the extent pointer in the extent
@@ -596,7 +637,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_alloc_logged_file_extent(trans,
root, root->root_key.objectid,
key->objectid, offset, &ins);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
btrfs_release_path(path);
@@ -613,29 +655,33 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_csums_range(root->log_root,
csum_start, csum_end - 1,
&ordered_sums, 0);
- BUG_ON(ret);
+ if (ret)
+ goto out;
while (!list_empty(&ordered_sums)) {
struct btrfs_ordered_sum *sums;
sums = list_entry(ordered_sums.next,
struct btrfs_ordered_sum,
list);
- ret = btrfs_csum_file_blocks(trans,
+ if (!ret)
+ ret = btrfs_csum_file_blocks(trans,
root->fs_info->csum_root,
sums);
- BUG_ON(ret);
list_del(&sums->list);
kfree(sums);
}
+ if (ret)
+ goto out;
} else {
btrfs_release_path(path);
}
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
/* inline extents are easy, we just overwrite them */
ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
- inode_set_bytes(inode, saved_nbytes);
+ inode_add_bytes(inode, nbytes);
ret = btrfs_update_inode(trans, root, inode);
out:
if (inode)
@@ -677,20 +723,21 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
inode = read_one_inode(root, location.objectid);
if (!inode) {
- kfree(name);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
ret = link_to_fixup_dir(trans, root, path, location.objectid);
- BUG_ON(ret);
+ if (ret)
+ goto out;
ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
- BUG_ON(ret);
+ if (ret)
+ goto out;
+ btrfs_run_delayed_items(trans, root);
+out:
kfree(name);
-
iput(inode);
-
- btrfs_run_delayed_items(trans, root);
return ret;
}
@@ -843,7 +890,8 @@ again:
victim_name_len = btrfs_inode_ref_name_len(leaf,
victim_ref);
victim_name = kmalloc(victim_name_len, GFP_NOFS);
- BUG_ON(!victim_name);
+ if (!victim_name)
+ return -ENOMEM;
read_extent_buffer(leaf, victim_name,
(unsigned long)(victim_ref + 1),
@@ -859,9 +907,10 @@ again:
ret = btrfs_unlink_inode(trans, root, dir,
inode, victim_name,
victim_name_len);
- BUG_ON(ret);
- btrfs_run_delayed_items(trans, root);
kfree(victim_name);
+ if (ret)
+ return ret;
+ btrfs_run_delayed_items(trans, root);
*search_done = 1;
goto again;
}
@@ -869,7 +918,6 @@ again:
ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
}
- BUG_ON(ret);
/*
* NOTE: we have searched root tree and checked the
@@ -903,6 +951,8 @@ again:
goto next;
victim_name = kmalloc(victim_name_len, GFP_NOFS);
+ if (!victim_name)
+ return -ENOMEM;
read_extent_buffer(leaf, victim_name, (unsigned long)&extref->name,
victim_name_len);
@@ -929,14 +979,16 @@ again:
victim_name_len);
btrfs_run_delayed_items(trans, root);
}
- BUG_ON(ret);
iput(victim_parent);
kfree(victim_name);
+ if (ret)
+ return ret;
*search_done = 1;
goto again;
}
kfree(victim_name);
- BUG_ON(ret);
+ if (ret)
+ return ret;
next:
cur_offset += victim_name_len + sizeof(*extref);
}
@@ -949,7 +1001,8 @@ next:
ref_index, name, namelen, 0);
if (di && !IS_ERR(di)) {
ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
btrfs_release_path(path);
@@ -958,7 +1011,8 @@ next:
name, namelen, 0);
if (di && !IS_ERR(di)) {
ret = drop_one_dir_item(trans, root, path, dir, di);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
btrfs_release_path(path);
@@ -1103,15 +1157,19 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
parent_objectid,
ref_index, name, namelen,
&search_done);
- if (ret == 1)
+ if (ret == 1) {
+ ret = 0;
+ goto out;
+ }
+ if (ret)
goto out;
- BUG_ON(ret);
}
/* insert our name */
ret = btrfs_add_link(trans, dir, inode, name, namelen,
0, ref_index);
- BUG_ON(ret);
+ if (ret)
+ goto out;
btrfs_update_inode(trans, root, inode);
}
@@ -1126,13 +1184,11 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
/* finally write the back reference in the inode */
ret = overwrite_item(trans, root, path, eb, slot, key);
- BUG_ON(ret);
-
out:
btrfs_release_path(path);
iput(dir);
iput(inode);
- return 0;
+ return ret;
}
static int insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -1290,10 +1346,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
if (S_ISDIR(inode->i_mode)) {
ret = replay_dir_deletes(trans, root, NULL, path,
ino, 1);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
ret = insert_orphan_item(trans, root, ino);
- BUG_ON(ret);
}
out:
@@ -1338,9 +1394,9 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans,
return -EIO;
ret = fixup_inode_link_count(trans, root, inode);
- BUG_ON(ret);
-
iput(inode);
+ if (ret)
+ goto out;
/*
* fixup on a directory may create new entries,
@@ -1382,12 +1438,15 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (ret == 0) {
- btrfs_inc_nlink(inode);
+ if (!inode->i_nlink)
+ set_nlink(inode, 1);
+ else
+ btrfs_inc_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
} else if (ret == -EEXIST) {
ret = 0;
} else {
- BUG();
+ BUG(); /* Logic Error */
}
iput(inode);
@@ -1456,7 +1515,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct inode *dir;
u8 log_type;
int exists;
- int ret;
+ int ret = 0;
dir = read_one_inode(root, key->objectid);
if (!dir)
@@ -1488,7 +1547,9 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
key->offset, name,
name_len, 1);
} else {
- BUG();
+ /* Corruption */
+ ret = -EINVAL;
+ goto out;
}
if (IS_ERR_OR_NULL(dst_di)) {
/* we need a sequence number to insert, so we only
@@ -1516,7 +1577,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto out;
ret = drop_one_dir_item(trans, root, path, dir, dst_di);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (key->type == BTRFS_DIR_INDEX_KEY)
goto insert;
@@ -1524,14 +1586,15 @@ out:
btrfs_release_path(path);
kfree(name);
iput(dir);
- return 0;
+ return ret;
insert:
btrfs_release_path(path);
ret = insert_one_name(trans, root, path, key->objectid, key->offset,
name, name_len, log_type, &log_key);
-
- BUG_ON(ret && ret != -ENOENT);
+ if (ret && ret != -ENOENT)
+ goto out;
+ ret = 0;
goto out;
}
@@ -1562,7 +1625,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
return -EIO;
name_len = btrfs_dir_name_len(eb, di);
ret = replay_one_name(trans, root, path, eb, di, key);
- BUG_ON(ret);
+ if (ret)
+ return ret;
ptr = (unsigned long)(di + 1);
ptr += name_len;
}
@@ -1723,16 +1787,21 @@ again:
ret = link_to_fixup_dir(trans, root,
path, location.objectid);
- BUG_ON(ret);
+ if (ret) {
+ kfree(name);
+ iput(inode);
+ goto out;
+ }
+
btrfs_inc_nlink(inode);
ret = btrfs_unlink_inode(trans, root, dir, inode,
name, name_len);
- BUG_ON(ret);
-
- btrfs_run_delayed_items(trans, root);
-
+ if (!ret)
+ btrfs_run_delayed_items(trans, root);
kfree(name);
iput(inode);
+ if (ret)
+ goto out;
/* there might still be more names under this key
* check and repeat if required
@@ -1836,7 +1905,8 @@ again:
ret = check_item_in_log(trans, root, log, path,
log_path, dir,
&found_key);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (found_key.offset == (u64)-1)
break;
dir_key.offset = found_key.offset + 1;
@@ -1913,11 +1983,13 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (S_ISDIR(mode)) {
ret = replay_dir_deletes(wc->trans,
root, log, path, key.objectid, 0);
- BUG_ON(ret);
+ if (ret)
+ break;
}
ret = overwrite_item(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
+ if (ret)
+ break;
/* for regular files, make sure corresponding
* orhpan item exist. extents past the new EOF
@@ -1926,12 +1998,14 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (S_ISREG(mode)) {
ret = insert_orphan_item(wc->trans, root,
key.objectid);
- BUG_ON(ret);
+ if (ret)
+ break;
}
ret = link_to_fixup_dir(wc->trans, root,
path, key.objectid);
- BUG_ON(ret);
+ if (ret)
+ break;
}
if (wc->stage < LOG_WALK_REPLAY_ALL)
continue;
@@ -1940,28 +2014,35 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
if (key.type == BTRFS_XATTR_ITEM_KEY) {
ret = overwrite_item(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
+ if (ret)
+ break;
} else if (key.type == BTRFS_INODE_REF_KEY) {
ret = add_inode_ref(wc->trans, root, log, path,
eb, i, &key);
- BUG_ON(ret && ret != -ENOENT);
+ if (ret && ret != -ENOENT)
+ break;
+ ret = 0;
} else if (key.type == BTRFS_INODE_EXTREF_KEY) {
ret = add_inode_ref(wc->trans, root, log, path,
eb, i, &key);
- BUG_ON(ret && ret != -ENOENT);
+ if (ret && ret != -ENOENT)
+ break;
+ ret = 0;
} else if (key.type == BTRFS_EXTENT_DATA_KEY) {
ret = replay_one_extent(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
+ if (ret)
+ break;
} else if (key.type == BTRFS_DIR_ITEM_KEY ||
key.type == BTRFS_DIR_INDEX_KEY) {
ret = replay_one_dir_item(wc->trans, root, path,
eb, i, &key);
- BUG_ON(ret);
+ if (ret)
+ break;
}
}
btrfs_free_path(path);
- return 0;
+ return ret;
}
static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
@@ -2006,8 +2087,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
if (*level == 1) {
ret = wc->process_func(root, next, wc, ptr_gen);
- if (ret)
+ if (ret) {
+ free_extent_buffer(next);
return ret;
+ }
path->slots[*level]++;
if (wc->free) {
@@ -2027,7 +2110,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_and_pin_reserved_extent(root,
bytenr, blocksize);
- BUG_ON(ret); /* -ENOMEM or logic errors */
+ if (ret) {
+ free_extent_buffer(next);
+ return ret;
+ }
}
free_extent_buffer(next);
continue;
@@ -2100,7 +2186,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
ret = btrfs_free_and_pin_reserved_extent(root,
path->nodes[*level]->start,
path->nodes[*level]->len);
- BUG_ON(ret);
+ if (ret)
+ return ret;
}
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
@@ -2122,7 +2209,6 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
int wret;
int level;
struct btrfs_path *path;
- int i;
int orig_level;
path = btrfs_alloc_path();
@@ -2174,17 +2260,12 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
BTRFS_TREE_LOG_OBJECTID);
ret = btrfs_free_and_pin_reserved_extent(log, next->start,
next->len);
- BUG_ON(ret); /* -ENOMEM or logic errors */
+ if (ret)
+ goto out;
}
}
out:
- for (i = 0; i <= orig_level; i++) {
- if (path->nodes[i]) {
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
- }
btrfs_free_path(path);
return ret;
}
@@ -2468,7 +2549,10 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
if (trans) {
ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
+
+ /* I don't think this can happen but just in case */
+ if (ret)
+ btrfs_abort_transaction(trans, log, ret);
}
while (1) {
@@ -2576,7 +2660,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
if (di) {
ret = btrfs_delete_one_dir_name(trans, log, path, di);
bytes_del += name_len;
- BUG_ON(ret);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
}
btrfs_release_path(path);
di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino,
@@ -2588,7 +2675,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
if (di) {
ret = btrfs_delete_one_dir_name(trans, log, path, di);
bytes_del += name_len;
- BUG_ON(ret);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
}
/* update the directory size in the log to reflect the names
@@ -2927,7 +3017,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
while (1) {
ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
- BUG_ON(ret == 0);
+ BUG_ON(ret == 0); /* Logic error */
if (ret < 0)
break;
@@ -3130,7 +3220,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
log->fs_info->csum_root,
ds + cs, ds + cs + cl - 1,
&ordered_sums, 0);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_release_path(dst_path);
+ kfree(ins_data);
+ return ret;
+ }
}
}
}
@@ -3170,115 +3264,6 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b)
return 0;
}
-static int drop_adjacent_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct extent_map *em,
- struct btrfs_path *path)
-{
- struct btrfs_file_extent_item *fi;
- struct extent_buffer *leaf;
- struct btrfs_key key, new_key;
- struct btrfs_map_token token;
- u64 extent_end;
- u64 extent_offset = 0;
- int extent_type;
- int del_slot = 0;
- int del_nr = 0;
- int ret = 0;
-
- while (1) {
- btrfs_init_map_token(&token);
- leaf = path->nodes[0];
- path->slots[0]++;
- if (path->slots[0] >= btrfs_header_nritems(leaf)) {
- if (del_nr) {
- ret = btrfs_del_items(trans, root, path,
- del_slot, del_nr);
- if (ret)
- return ret;
- del_nr = 0;
- }
-
- ret = btrfs_next_leaf_write(trans, root, path, 1);
- if (ret < 0)
- return ret;
- if (ret > 0)
- return 0;
- leaf = path->nodes[0];
- }
-
- btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- if (key.objectid != btrfs_ino(inode) ||
- key.type != BTRFS_EXTENT_DATA_KEY ||
- key.offset >= em->start + em->len)
- break;
-
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- extent_type = btrfs_token_file_extent_type(leaf, fi, &token);
- if (extent_type == BTRFS_FILE_EXTENT_REG ||
- extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
- extent_offset = btrfs_token_file_extent_offset(leaf,
- fi, &token);
- extent_end = key.offset +
- btrfs_token_file_extent_num_bytes(leaf, fi,
- &token);
- } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
- extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf, fi);
- } else {
- BUG();
- }
-
- if (extent_end <= em->len + em->start) {
- if (!del_nr) {
- del_slot = path->slots[0];
- }
- del_nr++;
- continue;
- }
-
- /*
- * Ok so we'll ignore previous items if we log a new extent,
- * which can lead to overlapping extents, so if we have an
- * existing extent we want to adjust we _have_ to check the next
- * guy to make sure we even need this extent anymore, this keeps
- * us from panicing in set_item_key_safe.
- */
- if (path->slots[0] < btrfs_header_nritems(leaf) - 1) {
- struct btrfs_key tmp_key;
-
- btrfs_item_key_to_cpu(leaf, &tmp_key,
- path->slots[0] + 1);
- if (tmp_key.objectid == btrfs_ino(inode) &&
- tmp_key.type == BTRFS_EXTENT_DATA_KEY &&
- tmp_key.offset <= em->start + em->len) {
- if (!del_nr)
- del_slot = path->slots[0];
- del_nr++;
- continue;
- }
- }
-
- BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
- memcpy(&new_key, &key, sizeof(new_key));
- new_key.offset = em->start + em->len;
- btrfs_set_item_key_safe(trans, root, path, &new_key);
- extent_offset += em->start + em->len - key.offset;
- btrfs_set_token_file_extent_offset(leaf, fi, extent_offset,
- &token);
- btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end -
- (em->start + em->len),
- &token);
- btrfs_mark_buffer_dirty(leaf);
- }
-
- if (del_nr)
- ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
-
- return ret;
-}
-
static int log_one_extent(struct btrfs_trans_handle *trans,
struct inode *inode, struct btrfs_root *root,
struct extent_map *em, struct btrfs_path *path)
@@ -3300,39 +3285,24 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
int index = log->log_transid % 2;
bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-insert:
+ ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
+ em->start + em->len, NULL, 0);
+ if (ret)
+ return ret;
+
INIT_LIST_HEAD(&ordered_sums);
btrfs_init_map_token(&token);
key.objectid = btrfs_ino(inode);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = em->start;
- path->really_keep_locks = 1;
ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi));
- if (ret && ret != -EEXIST) {
- path->really_keep_locks = 0;
+ if (ret)
return ret;
- }
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- /*
- * If we are overwriting an inline extent with a real one then we need
- * to just delete the inline extent as it may not be large enough to
- * have the entire file_extent_item.
- */
- if (ret && btrfs_token_file_extent_type(leaf, fi, &token) ==
- BTRFS_FILE_EXTENT_INLINE) {
- ret = btrfs_del_item(trans, log, path);
- btrfs_release_path(path);
- if (ret) {
- path->really_keep_locks = 0;
- return ret;
- }
- goto insert;
- }
-
btrfs_set_token_file_extent_generation(leaf, fi, em->generation,
&token);
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3371,22 +3341,14 @@ insert:
em->start - em->orig_start,
&token);
btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token);
- btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token);
+ btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->ram_bytes, &token);
btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type,
&token);
btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token);
btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token);
btrfs_mark_buffer_dirty(leaf);
- /*
- * Have to check the extent to the right of us to make sure it doesn't
- * fall in our current range. We're ok if the previous extent is in our
- * range since the recovery stuff will run us in key order and thus just
- * drop the part we overwrote.
- */
- ret = drop_adjacent_extents(trans, log, inode, em, path);
btrfs_release_path(path);
- path->really_keep_locks = 0;
if (ret) {
return ret;
}
@@ -3611,8 +3573,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
bool fast_search = false;
u64 ino = btrfs_ino(inode);
- log = root->log_root;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -3879,9 +3839,9 @@ out:
* only logging is done of any parent directories that are older than
* the last committed transaction
*/
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only)
+static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode,
+ struct dentry *parent, int exists_only)
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
@@ -4072,6 +4032,9 @@ again:
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);
+ free_extent_buffer(log->node);
+ free_extent_buffer(log->commit_root);
+ kfree(log);
btrfs_error(fs_info, ret, "Couldn't read target root "
"for tree log recovery.");
goto error;
@@ -4080,12 +4043,10 @@ again:
wc.replay_dest->log_root = log;
btrfs_record_root_in_trans(trans, wc.replay_dest);
ret = walk_log_tree(trans, log, &wc);
- BUG_ON(ret);
- if (wc.stage == LOG_WALK_REPLAY_ALL) {
+ if (!ret && wc.stage == LOG_WALK_REPLAY_ALL) {
ret = fixup_inode_link_counts(trans, wc.replay_dest,
path);
- BUG_ON(ret);
}
key.offset = found_key.offset - 1;
@@ -4094,6 +4055,9 @@ again:
free_extent_buffer(log->commit_root);
kfree(log);
+ if (ret)
+ goto error;
+
if (found_key.offset == 0)
break;
}
@@ -4114,17 +4078,20 @@ again:
btrfs_free_path(path);
+ /* step 4: commit the transaction, which also unpins the blocks */
+ ret = btrfs_commit_transaction(trans, fs_info->tree_root);
+ if (ret)
+ return ret;
+
free_extent_buffer(log_root_tree->node);
log_root_tree->log_root = NULL;
fs_info->log_root_recovering = 0;
-
- /* step 4: commit the transaction, which also unpins the blocks */
- btrfs_commit_transaction(trans, fs_info->tree_root);
-
kfree(log_root_tree);
- return 0;
+ return 0;
error:
+ if (wc.trans)
+ btrfs_end_transaction(wc.trans, fs_info->tree_root);
btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 862ac813f6b8..1d4ae0d15a70 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -40,9 +40,6 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans,
struct inode *inode, u64 dirid);
void btrfs_end_log_trans(struct btrfs_root *root);
int btrfs_pin_log_trans(struct btrfs_root *root);
-int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode,
int for_rename);
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index ddc61cad0080..7b417e20efe2 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -53,6 +53,7 @@ void ulist_init(struct ulist *ulist)
ulist->nnodes = 0;
ulist->nodes = ulist->int_nodes;
ulist->nodes_alloced = ULIST_SIZE;
+ ulist->root = RB_ROOT;
}
EXPORT_SYMBOL(ulist_init);
@@ -72,6 +73,7 @@ void ulist_fini(struct ulist *ulist)
if (ulist->nodes_alloced > ULIST_SIZE)
kfree(ulist->nodes);
ulist->nodes_alloced = 0; /* in case ulist_fini is called twice */
+ ulist->root = RB_ROOT;
}
EXPORT_SYMBOL(ulist_fini);
@@ -123,6 +125,45 @@ void ulist_free(struct ulist *ulist)
}
EXPORT_SYMBOL(ulist_free);
+static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
+{
+ struct rb_node *n = ulist->root.rb_node;
+ struct ulist_node *u = NULL;
+
+ while (n) {
+ u = rb_entry(n, struct ulist_node, rb_node);
+ if (u->val < val)
+ n = n->rb_right;
+ else if (u->val > val)
+ n = n->rb_left;
+ else
+ return u;
+ }
+ return NULL;
+}
+
+static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
+{
+ struct rb_node **p = &ulist->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct ulist_node *cur = NULL;
+
+ while (*p) {
+ parent = *p;
+ cur = rb_entry(parent, struct ulist_node, rb_node);
+
+ if (cur->val < ins->val)
+ p = &(*p)->rb_right;
+ else if (cur->val > ins->val)
+ p = &(*p)->rb_left;
+ else
+ return -EEXIST;
+ }
+ rb_link_node(&ins->rb_node, parent, p);
+ rb_insert_color(&ins->rb_node, &ulist->root);
+ return 0;
+}
+
/**
* ulist_add - add an element to the ulist
* @ulist: ulist to add the element to
@@ -151,14 +192,13 @@ int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask)
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 *old_aux, gfp_t gfp_mask)
{
- int i;
-
- for (i = 0; i < ulist->nnodes; ++i) {
- if (ulist->nodes[i].val == val) {
- if (old_aux)
- *old_aux = ulist->nodes[i].aux;
- return 0;
- }
+ int ret = 0;
+ struct ulist_node *node = NULL;
+ node = ulist_rbtree_search(ulist, val);
+ if (node) {
+ if (old_aux)
+ *old_aux = node->aux;
+ return 0;
}
if (ulist->nnodes >= ulist->nodes_alloced) {
@@ -187,6 +227,8 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
}
ulist->nodes[ulist->nnodes].val = val;
ulist->nodes[ulist->nnodes].aux = aux;
+ ret = ulist_rbtree_insert(ulist, &ulist->nodes[ulist->nnodes]);
+ BUG_ON(ret);
++ulist->nnodes;
return 1;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 21a1963439c3..fb36731074b5 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -8,6 +8,9 @@
#ifndef __ULIST__
#define __ULIST__
+#include <linux/list.h>
+#include <linux/rbtree.h>
+
/*
* ulist is a generic data structure to hold a collection of unique u64
* values. The only operations it supports is adding to the list and
@@ -34,6 +37,7 @@ struct ulist_iterator {
struct ulist_node {
u64 val; /* value to store */
u64 aux; /* auxiliary value saved along with the val */
+ struct rb_node rb_node; /* used to speed up search */
};
struct ulist {
@@ -54,6 +58,8 @@ struct ulist {
*/
struct ulist_node *nodes;
+ struct rb_root root;
+
/*
* inline storage space for the first ULIST_SIZE entries
*/
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 35bb2d4ed29f..8bffb9174afb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -46,6 +46,7 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_device *device);
static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
+static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
static DEFINE_MUTEX(uuid_mutex);
@@ -684,6 +685,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
__btrfs_close_devices(fs_devices);
free_fs_devices(fs_devices);
}
+ /*
+ * Wait for rcu kworkers under __btrfs_close_devices
+ * to finish all blkdev_puts so device is really
+ * free when umount is done.
+ */
+ rcu_barrier();
return ret;
}
@@ -711,9 +718,9 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
if (!device->name)
continue;
- ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
- &bdev, &bh);
- if (ret)
+ /* Just open everything we can; ignore failures here */
+ if (btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
+ &bdev, &bh))
continue;
disk_super = (struct btrfs_super_block *)bh->b_data;
@@ -1193,10 +1200,10 @@ out:
return ret;
}
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset, u64 start, u64 num_bytes)
+static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ u64 chunk_tree, u64 chunk_objectid,
+ u64 chunk_offset, u64 start, u64 num_bytes)
{
int ret;
struct btrfs_path *path;
@@ -1323,9 +1330,9 @@ error:
* the device information is stored in the chunk root
* the btrfs_device struct should be fully filled in
*/
-int btrfs_add_device(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_device *device)
+static int btrfs_add_device(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_device *device)
{
int ret;
struct btrfs_path *path;
@@ -1704,8 +1711,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
}
-int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
- struct btrfs_device **device)
+static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
+ struct btrfs_device **device)
{
int ret = 0;
struct btrfs_super_block *disk_super;
@@ -2379,7 +2386,11 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
return ret;
trans = btrfs_start_transaction(root, 0);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ btrfs_std_error(root->fs_info, ret);
+ return ret;
+ }
lock_chunks(root);
@@ -3050,7 +3061,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
unset_balance_control(fs_info);
ret = del_balance_item(fs_info->tree_root);
- BUG_ON(ret);
+ if (ret)
+ btrfs_std_error(fs_info, ret);
atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
}
@@ -3108,14 +3120,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
- else if (num_devices < 4)
+ else if (num_devices > 1)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
- else
- allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10 |
- BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6);
-
+ if (num_devices > 2)
+ allowed |= BTRFS_BLOCK_GROUP_RAID5;
+ if (num_devices > 3)
+ allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_RAID6);
if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
(!alloc_profile_is_valid(bctl->data.target, 1) ||
(bctl->data.target & ~allowed))) {
@@ -3230,6 +3241,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
update_ioctl_balance_args(fs_info, 0, bargs);
}
+ if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
+ balance_need_close(fs_info)) {
+ __cancel_balance(fs_info);
+ }
+
wake_up(&fs_info->balance_wait_q);
return ret;
@@ -3591,7 +3607,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
return 0;
}
-struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = {
.sub_stripes = 2,
.dev_stripes = 1,
@@ -3658,18 +3674,10 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
{
- u64 features;
-
if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
return;
- features = btrfs_super_incompat_flags(info->super_copy);
- if (features & BTRFS_FEATURE_INCOMPAT_RAID56)
- return;
-
- features |= BTRFS_FEATURE_INCOMPAT_RAID56;
- btrfs_set_super_incompat_flags(info->super_copy, features);
- printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n");
+ btrfs_set_fs_incompat(info, RAID56);
}
static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
@@ -3916,7 +3924,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
em_tree = &extent_root->fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
+ ret = add_extent_mapping(em_tree, em, 0);
write_unlock(&em_tree->lock);
if (ret) {
free_extent_map(em);
@@ -4224,9 +4232,25 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, len);
read_unlock(&em_tree->lock);
- BUG_ON(!em);
- BUG_ON(em->start > logical || em->start + em->len < logical);
+ /*
+ * We could return errors for these cases, but that could get ugly and
+ * we'd probably do the same thing which is just not do anything else
+ * and exit, so return 1 so the callers don't try to use other copies.
+ */
+ if (!em) {
+ btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+ logical+len);
+ return 1;
+ }
+
+ if (em->start > logical || em->start + em->len < logical) {
+ btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+ "%Lu-%Lu\n", logical, logical+len, em->start,
+ em->start + em->len);
+ return 1;
+ }
+
map = (struct map_lookup *)em->bdev;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
@@ -4395,13 +4419,19 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
read_unlock(&em_tree->lock);
if (!em) {
- printk(KERN_CRIT "btrfs: unable to find logical %llu len %llu\n",
- (unsigned long long)logical,
- (unsigned long long)*length);
- BUG();
+ btrfs_crit(fs_info, "unable to find logical %llu len %llu",
+ (unsigned long long)logical,
+ (unsigned long long)*length);
+ return -EINVAL;
+ }
+
+ if (em->start > logical || em->start + em->len < logical) {
+ btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
+ "found %Lu-%Lu\n", logical, em->start,
+ em->start + em->len);
+ return -EINVAL;
}
- BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
offset = logical - em->start;
@@ -4919,7 +4949,18 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
em = lookup_extent_mapping(em_tree, chunk_start, 1);
read_unlock(&em_tree->lock);
- BUG_ON(!em || em->start != chunk_start);
+ if (!em) {
+ printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n",
+ chunk_start);
+ return -EIO;
+ }
+
+ if (em->start != chunk_start) {
+ printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n",
+ em->start, chunk_start);
+ free_extent_map(em);
+ return -EIO;
+ }
map = (struct map_lookup *)em->bdev;
length = em->len;
@@ -4977,42 +5018,16 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
return 0;
}
-static void *merge_stripe_index_into_bio_private(void *bi_private,
- unsigned int stripe_index)
-{
- /*
- * with single, dup, RAID0, RAID1 and RAID10, stripe_index is
- * at most 1.
- * The alternative solution (instead of stealing bits from the
- * pointer) would be to allocate an intermediate structure
- * that contains the old private pointer plus the stripe_index.
- */
- BUG_ON((((uintptr_t)bi_private) & 3) != 0);
- BUG_ON(stripe_index > 3);
- return (void *)(((uintptr_t)bi_private) | stripe_index);
-}
-
-static struct btrfs_bio *extract_bbio_from_bio_private(void *bi_private)
-{
- return (struct btrfs_bio *)(((uintptr_t)bi_private) & ~((uintptr_t)3));
-}
-
-static unsigned int extract_stripe_index_from_bio_private(void *bi_private)
-{
- return (unsigned int)((uintptr_t)bi_private) & 3;
-}
-
static void btrfs_end_bio(struct bio *bio, int err)
{
- struct btrfs_bio *bbio = extract_bbio_from_bio_private(bio->bi_private);
+ struct btrfs_bio *bbio = bio->bi_private;
int is_orig_bio = 0;
if (err) {
atomic_inc(&bbio->error);
if (err == -EIO || err == -EREMOTEIO) {
unsigned int stripe_index =
- extract_stripe_index_from_bio_private(
- bio->bi_private);
+ btrfs_io_bio(bio)->stripe_index;
struct btrfs_device *dev;
BUG_ON(stripe_index >= bbio->num_stripes);
@@ -5042,8 +5057,7 @@ static void btrfs_end_bio(struct bio *bio, int err)
}
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
+ btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
/* only send an error to the higher layers if it is
* beyond the tolerance of the btrfs bio
*/
@@ -5079,9 +5093,9 @@ struct async_sched {
* This will add one bio to the pending list for a device and make sure
* the work struct is scheduled.
*/
-noinline void btrfs_schedule_bio(struct btrfs_root *root,
- struct btrfs_device *device,
- int rw, struct bio *bio)
+static noinline void btrfs_schedule_bio(struct btrfs_root *root,
+ struct btrfs_device *device,
+ int rw, struct bio *bio)
{
int should_queue = 1;
struct btrfs_pending_bios *pending_bios;
@@ -5150,7 +5164,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
}
prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
- if ((bio->bi_size >> 9) > max_sectors)
+ if (bio_sectors(bio) > max_sectors)
return 0;
if (!q->merge_bvec_fn)
@@ -5169,8 +5183,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
bio->bi_private = bbio;
- bio->bi_private = merge_stripe_index_into_bio_private(
- bio->bi_private, (unsigned int)dev_nr);
+ btrfs_io_bio(bio)->stripe_index = dev_nr;
bio->bi_end_io = btrfs_end_bio;
bio->bi_sector = physical >> 9;
#ifdef DEBUG
@@ -5231,8 +5244,7 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
if (atomic_dec_and_test(&bbio->stripes_pending)) {
bio->bi_private = bbio->private;
bio->bi_end_io = bbio->end_io;
- bio->bi_bdev = (struct block_device *)
- (unsigned long)bbio->mirror_num;
+ btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
bio->bi_sector = logical >> 9;
kfree(bbio);
bio_endio(bio, -EIO);
@@ -5281,10 +5293,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
if (map_length < length) {
- printk(KERN_CRIT "btrfs: mapping failed logical %llu bio len %llu "
- "len %llu\n", (unsigned long long)logical,
- (unsigned long long)length,
- (unsigned long long)map_length);
+ btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
+ (unsigned long long)logical,
+ (unsigned long long)length,
+ (unsigned long long)map_length);
BUG();
}
@@ -5310,7 +5322,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
}
if (dev_nr < total_devs - 1) {
- bio = bio_clone(first_bio, GFP_NOFS);
+ bio = btrfs_bio_clone(first_bio, GFP_NOFS);
BUG_ON(!bio); /* -ENOMEM */
} else {
bio = first_bio;
@@ -5449,7 +5461,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
}
write_lock(&map_tree->map_tree.lock);
- ret = add_extent_mapping(&map_tree->map_tree, em);
+ ret = add_extent_mapping(&map_tree->map_tree, em, 0);
write_unlock(&map_tree->map_tree.lock);
BUG_ON(ret); /* Tree corruption */
free_extent_map(em);
@@ -5556,8 +5568,8 @@ static int read_one_dev(struct btrfs_root *root,
return -EIO;
if (!device) {
- printk(KERN_WARNING "warning devid %llu missing\n",
- (unsigned long long)devid);
+ btrfs_warn(root->fs_info, "devid %llu missing",
+ (unsigned long long)devid);
device = add_missing_dev(root, devid, dev_uuid);
if (!device)
return -ENOMEM;
@@ -5899,7 +5911,7 @@ void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
btrfs_dev_stat_print_on_error(dev);
}
-void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
+static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
{
if (!dev->dev_stats_valid)
return;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 062d8604d35b..f6247e2a47f7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -152,6 +152,26 @@ struct btrfs_fs_devices {
int rotating;
};
+/*
+ * we need the mirror number and stripe index to be passed around
+ * the call chain while we are processing end_io (especially errors).
+ * Really, what we need is a btrfs_bio structure that has this info
+ * and is properly sized with its stripe array, but we're not there
+ * quite yet. We have our own btrfs bioset, and all of the bios
+ * we allocate are actually btrfs_io_bios. We'll cram as much of
+ * struct btrfs_bio as we can into this over time.
+ */
+struct btrfs_io_bio {
+ unsigned long mirror_num;
+ unsigned long stripe_index;
+ struct bio bio;
+};
+
+static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)
+{
+ return container_of(bio, struct btrfs_io_bio, bio);
+}
+
struct btrfs_bio_stripe {
struct btrfs_device *dev;
u64 physical;
@@ -254,10 +274,6 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
#define btrfs_bio_size(n) (sizeof(struct btrfs_bio) + \
(sizeof(struct btrfs_bio_stripe) * (n)))
-int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
- struct btrfs_device *device,
- u64 chunk_tree, u64 chunk_objectid,
- u64 chunk_offset, u64 start, u64 num_bytes);
int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
u64 logical, u64 *length,
struct btrfs_bio **bbio_ret, int mirror_num);
@@ -282,11 +298,6 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path,
struct btrfs_device **device);
-int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
- struct btrfs_device **device);
-int btrfs_add_device(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_root *root, char *device_path);
void btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -307,7 +318,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);
-void btrfs_dev_stat_print_on_error(struct btrfs_device *device);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_root *root,
struct btrfs_ioctl_get_dev_stats *stats);
@@ -321,9 +331,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
struct btrfs_device *tgtdev);
int btrfs_scratch_superblock(struct btrfs_device *device);
-void btrfs_schedule_bio(struct btrfs_root *root,
- struct btrfs_device *device,
- int rw, struct bio *bio);
int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree,
u64 logical, u64 len, int mirror_num);
unsigned long btrfs_full_stripe_len(struct btrfs_root *root,
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 446a6848c554..05740b9789e4 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -406,8 +406,8 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
XATTR_REPLACE);
}
-int btrfs_initxattrs(struct inode *inode, const struct xattr *xattr_array,
- void *fs_info)
+static int btrfs_initxattrs(struct inode *inode,
+ const struct xattr *xattr_array, void *fs_info)
{
const struct xattr *xattr;
struct btrfs_trans_handle *trans = fs_info;