summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1654
1 files changed, 1266 insertions, 388 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95542a1b3dfc..ca1b767d51f7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,13 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/mount.h>
+#include <linux/btrfs.h>
+#include <linux/blkdev.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "ioctl.h"
#include "print-tree.h"
#include "ordered-data.h"
#include "xattr.h"
@@ -54,6 +55,7 @@
#include "locking.h"
#include "free-space-cache.h"
#include "inode-map.h"
+#include "backref.h"
struct btrfs_iget_args {
u64 ino;
@@ -71,6 +73,7 @@ static const struct file_operations btrfs_dir_file_operations;
static struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
+static struct kmem_cache *btrfs_delalloc_work_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_transaction_cachep;
struct kmem_cache *btrfs_path_cachep;
@@ -87,13 +90,17 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
};
-static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct inode *inode);
static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+ u64 len, u64 orig_start,
+ u64 block_start, u64 block_len,
+ u64 orig_block_len, int type);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
@@ -226,8 +233,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
u64 isize = i_size_read(inode);
u64 actual_end = min(end + 1, isize);
u64 inline_len = actual_end - start;
- u64 aligned_end = (end + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
+ u64 aligned_end = ALIGN(end, root->sectorsize);
u64 data_len = inline_len;
int ret;
@@ -260,6 +266,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
return 1;
}
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
return 0;
@@ -384,7 +391,7 @@ again:
* a compressed extent to 128k.
*/
total_compressed = min(total_compressed, max_uncompressed);
- num_bytes = (end - start + blocksize) & ~(blocksize - 1);
+ num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
total_in = 0;
ret = 0;
@@ -483,15 +490,13 @@ cont:
* up to a block size boundary so the allocator does sane
* things
*/
- total_compressed = (total_compressed + blocksize - 1) &
- ~(blocksize - 1);
+ total_compressed = ALIGN(total_compressed, blocksize);
/*
* one last check to make sure the compression is really a
* win, compare the page count read with the blocks on disk
*/
- total_in = (total_in + PAGE_CACHE_SIZE - 1) &
- ~(PAGE_CACHE_SIZE - 1);
+ total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
if (total_compressed >= total_in) {
will_compress = 0;
} else {
@@ -603,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
if (list_empty(&async_cow->extents))
return 0;
-
+again:
while (!list_empty(&async_cow->extents)) {
async_extent = list_entry(async_cow->extents.next,
struct async_extent, list);
@@ -643,6 +648,8 @@ retry:
async_extent->ram_size - 1,
btrfs_get_extent,
WB_SYNC_ALL);
+ else if (ret)
+ unlock_page(async_cow->locked_page);
kfree(async_extent);
cond_resched();
continue;
@@ -667,6 +674,7 @@ retry:
if (ret) {
int i;
+
for (i = 0; i < async_extent->nr_pages; i++) {
WARN_ON(async_extent->pages[i]->mapping);
page_cache_release(async_extent->pages[i]);
@@ -674,12 +682,10 @@ retry:
kfree(async_extent->pages);
async_extent->nr_pages = 0;
async_extent->pages = NULL;
- unlock_extent(io_tree, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1);
+
if (ret == -ENOSPC)
goto retry;
- goto out_free; /* JDM: Requeue? */
+ goto out_free;
}
/*
@@ -691,21 +697,29 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map();
- BUG_ON(!em); /* -ENOMEM */
+ if (!em)
+ goto out_free_reserve;
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
+ em->mod_start = em->start;
+ em->mod_len = em->len;
em->block_start = ins.objectid;
em->block_len = ins.offset;
+ em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->compress_type = async_extent->compress_type;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -716,6 +730,9 @@ retry:
async_extent->ram_size - 1, 0);
}
+ if (ret)
+ goto out_free_reserve;
+
ret = btrfs_add_ordered_extent_compress(inode,
async_extent->start,
ins.objectid,
@@ -723,7 +740,8 @@ retry:
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_free_reserve;
/*
* clear dirty, set writeback and unlock the pages.
@@ -744,18 +762,30 @@ retry:
ins.objectid,
ins.offset, async_extent->pages,
async_extent->nr_pages);
-
- BUG_ON(ret); /* -ENOMEM */
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
+ if (ret)
+ goto out;
cond_resched();
}
ret = 0;
out:
return ret;
+out_free_reserve:
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
out_free:
+ extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
+ async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1,
+ NULL, EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_UNLOCK |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
kfree(async_extent);
- goto out;
+ goto again;
}
static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -803,14 +833,14 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
* required to start IO on it. It may be clean and already done with
* IO when we return.
*/
-static noinline int cow_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
+static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct btrfs_root *root,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written,
+ int unlock)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
@@ -823,25 +853,10 @@ static noinline int cow_file_range(struct inode *inode,
int ret = 0;
BUG_ON(btrfs_is_free_space_inode(inode));
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
- return PTR_ERR(trans);
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- num_bytes = (end - start + blocksize) & ~(blocksize - 1);
+ num_bytes = ALIGN(end - start + 1, blocksize);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
- ret = 0;
/* if this is a small write inside eof, kick off defrag */
if (num_bytes < 64 * 1024 &&
@@ -897,15 +912,22 @@ static noinline int cow_file_range(struct inode *inode,
em->orig_start = em->start;
ram_size = ins.offset;
em->len = ins.offset;
+ em->mod_start = em->start;
+ em->mod_len = em->len;
em->block_start = ins.objectid;
em->block_len = ins.offset;
+ em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -952,11 +974,9 @@ static noinline int cow_file_range(struct inode *inode,
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
- ret = 0;
out:
- btrfs_end_transaction(trans, root);
-
return ret;
+
out_unlock:
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
@@ -971,6 +991,39 @@ out_unlock:
goto out;
}
+static noinline int cow_file_range(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written,
+ int unlock)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ extent_clear_unlock_delalloc(inode,
+ &BTRFS_I(inode)->io_tree,
+ start, end, locked_page,
+ EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_UNLOCK |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
+ return PTR_ERR(trans);
+ }
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
+ ret = __cow_file_range(trans, inode, root, locked_page, start, end,
+ page_started, nr_written, unlock);
+
+ btrfs_end_transaction(trans, root);
+
+ return ret;
+}
+
/*
* work queue call back to started compression on a file and pages
*/
@@ -1126,6 +1179,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 extent_offset;
u64 disk_bytenr;
u64 num_bytes;
+ u64 disk_num_bytes;
int extent_type;
int ret, err;
int type;
@@ -1228,6 +1282,8 @@ next_slot:
extent_offset = btrfs_file_extent_offset(leaf, fi);
extent_end = found_key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
+ disk_num_bytes =
+ btrfs_file_extent_disk_num_bytes(leaf, fi);
if (extent_end <= start) {
path->slots[0]++;
goto next_slot;
@@ -1281,9 +1337,9 @@ out_check:
btrfs_release_path(path);
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start,
- found_key.offset - 1, page_started,
- nr_written, 1);
+ ret = __cow_file_range(trans, inode, root, locked_page,
+ cow_start, found_key.offset - 1,
+ page_started, nr_written, 1);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto error;
@@ -1298,16 +1354,23 @@ out_check:
em = alloc_extent_map();
BUG_ON(!em); /* -ENOMEM */
em->start = cur_offset;
- em->orig_start = em->start;
+ em->orig_start = found_key.offset - extent_offset;
em->len = num_bytes;
em->block_len = num_bytes;
em->block_start = disk_bytenr;
+ em->orig_block_len = disk_num_bytes;
em->bdev = root->fs_info->fs_devices->latest_bdev;
+ em->mod_start = em->start;
+ em->mod_len = em->len;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+ set_bit(EXTENT_FLAG_FILLING, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -1352,8 +1415,9 @@ out_check:
}
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
+ ret = __cow_file_range(trans, inode, root, locked_page,
+ cow_start, end,
+ page_started, nr_written, 1);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto error;
@@ -1468,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
spin_unlock(&BTRFS_I(inode)->lock);
}
- spin_lock(&root->fs_info->delalloc_lock);
+ __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
+ root->fs_info->delalloc_batch);
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes += len;
- root->fs_info->delalloc_bytes += len;
- if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
- &root->fs_info->delalloc_inodes);
+ if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags)) {
+ spin_lock(&root->fs_info->delalloc_lock);
+ if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
+ &root->fs_info->delalloc_inodes);
+ set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags);
+ }
+ spin_unlock(&root->fs_info->delalloc_lock);
}
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock(&BTRFS_I(inode)->lock);
}
}
@@ -1510,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
&& do_list)
btrfs_free_reserved_data_space(inode, len);
- spin_lock(&root->fs_info->delalloc_lock);
- root->fs_info->delalloc_bytes -= len;
+ __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
+ root->fs_info->delalloc_batch);
+ spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->delalloc_bytes -= len;
-
if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
- !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
- list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+ test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags)) {
+ spin_lock(&root->fs_info->delalloc_lock);
+ if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ list_del_init(&BTRFS_I(inode)->delalloc_inodes);
+ clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &BTRFS_I(inode)->runtime_flags);
+ }
+ spin_unlock(&root->fs_info->delalloc_lock);
}
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock(&BTRFS_I(inode)->lock);
}
}
@@ -1526,12 +1605,11 @@ static void btrfs_clear_bit_hook(struct inode *inode,
* extent_io.c merge_bio_hook, this must check the chunk tree to make sure
* we don't create bios that span stripes or chunks
*/
-int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
+int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
size_t size, struct bio *bio,
unsigned long bio_flags)
{
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- struct btrfs_mapping_tree *map_tree;
u64 logical = (u64)bio->bi_sector << 9;
u64 length = 0;
u64 map_length;
@@ -1541,11 +1619,10 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
return 0;
length = bio->bi_size;
- map_tree = &root->fs_info->mapping_tree;
map_length = length;
- ret = btrfs_map_block(map_tree, READ, logical,
+ ret = btrfs_map_block(root->fs_info, rw, logical,
&map_length, NULL, 0);
- /* Will always return 0 or 1 with map_multi == NULL */
+ /* Will always return 0 with map_multi == NULL */
BUG_ON(ret < 0);
if (map_length < length + size)
return 1;
@@ -1586,7 +1663,12 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- return btrfs_map_bio(root, rw, bio, mirror_num, 1);
+ int ret;
+
+ ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+ if (ret)
+ bio_endio(bio, ret);
+ return ret;
}
/*
@@ -1601,6 +1683,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int ret = 0;
int skip_sum;
int metadata = 0;
+ int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -1610,31 +1693,43 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
if (!(rw & REQ_WRITE)) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
if (ret)
- return ret;
+ goto out;
if (bio_flags & EXTENT_BIO_COMPRESSED) {
- return btrfs_submit_compressed_read(inode, bio,
- mirror_num, bio_flags);
+ ret = btrfs_submit_compressed_read(inode, bio,
+ mirror_num,
+ bio_flags);
+ goto out;
} else if (!skip_sum) {
ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
if (ret)
- return ret;
+ goto out;
}
goto mapit;
- } else if (!skip_sum) {
+ } else if (async && !skip_sum) {
/* csum items have already been cloned */
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
goto mapit;
/* we're doing a write, do the async checksumming */
- return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num,
bio_flags, bio_offset,
__btrfs_submit_bio_start,
__btrfs_submit_bio_done);
+ goto out;
+ } else if (!skip_sum) {
+ ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+ if (ret)
+ goto out;
}
mapit:
- return btrfs_map_bio(root, rw, bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+
+out:
+ if (ret < 0)
+ bio_endio(bio, ret);
+ return ret;
}
/*
@@ -1657,8 +1752,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state)
{
- if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
- WARN_ON(1);
+ WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
cached_state, GFP_NOFS);
}
@@ -1837,6 +1931,643 @@ out:
return ret;
}
+/* snapshot-aware defrag */
+struct sa_defrag_extent_backref {
+ struct rb_node node;
+ struct old_sa_defrag_extent *old;
+ u64 root_id;
+ u64 inum;
+ u64 file_pos;
+ u64 extent_offset;
+ u64 num_bytes;
+ u64 generation;
+};
+
+struct old_sa_defrag_extent {
+ struct list_head list;
+ struct new_sa_defrag_extent *new;
+
+ u64 extent_offset;
+ u64 bytenr;
+ u64 offset;
+ u64 len;
+ int count;
+};
+
+struct new_sa_defrag_extent {
+ struct rb_root root;
+ struct list_head head;
+ struct btrfs_path *path;
+ struct inode *inode;
+ u64 file_pos;
+ u64 len;
+ u64 bytenr;
+ u64 disk_len;
+ u8 compress_type;
+};
+
+static int backref_comp(struct sa_defrag_extent_backref *b1,
+ struct sa_defrag_extent_backref *b2)
+{
+ if (b1->root_id < b2->root_id)
+ return -1;
+ else if (b1->root_id > b2->root_id)
+ return 1;
+
+ if (b1->inum < b2->inum)
+ return -1;
+ else if (b1->inum > b2->inum)
+ return 1;
+
+ if (b1->file_pos < b2->file_pos)
+ return -1;
+ else if (b1->file_pos > b2->file_pos)
+ return 1;
+
+ /*
+ * [------------------------------] ===> (a range of space)
+ * |<--->| |<---->| =============> (fs/file tree A)
+ * |<---------------------------->| ===> (fs/file tree B)
+ *
+ * A range of space can refer to two file extents in one tree while
+ * refer to only one file extent in another tree.
+ *
+ * So we may process a disk offset more than one time(two extents in A)
+ * and locate at the same extent(one extent in B), then insert two same
+ * backrefs(both refer to the extent in B).
+ */
+ return 0;
+}
+
+static void backref_insert(struct rb_root *root,
+ struct sa_defrag_extent_backref *backref)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sa_defrag_extent_backref *entry;
+ int ret;
+
+ while (*p) {
+ parent = *p;
+ entry = rb_entry(parent, struct sa_defrag_extent_backref, node);
+
+ ret = backref_comp(backref, entry);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&backref->node, parent, p);
+ rb_insert_color(&backref->node, root);
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
+ void *ctx)
+{
+ struct btrfs_file_extent_item *extent;
+ struct btrfs_fs_info *fs_info;
+ struct old_sa_defrag_extent *old = ctx;
+ struct new_sa_defrag_extent *new = old->new;
+ struct btrfs_path *path = new->path;
+ struct btrfs_key key;
+ struct btrfs_root *root;
+ struct sa_defrag_extent_backref *backref;
+ struct extent_buffer *leaf;
+ struct inode *inode = new->inode;
+ int slot;
+ int ret;
+ u64 extent_offset;
+ u64 num_bytes;
+
+ if (BTRFS_I(inode)->root->root_key.objectid == root_id &&
+ inum == btrfs_ino(inode))
+ return 0;
+
+ key.objectid = root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ if (PTR_ERR(root) == -ENOENT)
+ return 0;
+ WARN_ON(1);
+ pr_debug("inum=%llu, offset=%llu, root_id=%llu\n",
+ inum, offset, root_id);
+ return PTR_ERR(root);
+ }
+
+ key.objectid = inum;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ if (offset > (u64)-1 << 32)
+ key.offset = 0;
+ else
+ key.offset = offset;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ WARN_ON(1);
+ return ret;
+ }
+
+ while (1) {
+ cond_resched();
+
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0) {
+ goto out;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ continue;
+ }
+
+ path->slots[0]++;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+
+ if (key.objectid > inum)
+ goto out;
+
+ if (key.objectid < inum || key.type != BTRFS_EXTENT_DATA_KEY)
+ continue;
+
+ extent = btrfs_item_ptr(leaf, slot,
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
+ continue;
+
+ extent_offset = btrfs_file_extent_offset(leaf, extent);
+ if (key.offset - extent_offset != offset)
+ continue;
+
+ num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+ if (extent_offset >= old->extent_offset + old->offset +
+ old->len || extent_offset + num_bytes <=
+ old->extent_offset + old->offset)
+ continue;
+
+ break;
+ }
+
+ backref = kmalloc(sizeof(*backref), GFP_NOFS);
+ if (!backref) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ backref->root_id = root_id;
+ backref->inum = inum;
+ backref->file_pos = offset + extent_offset;
+ backref->num_bytes = num_bytes;
+ backref->extent_offset = extent_offset;
+ backref->generation = btrfs_file_extent_generation(leaf, extent);
+ backref->old = old;
+ backref_insert(&new->root, backref);
+ old->count++;
+out:
+ btrfs_release_path(path);
+ WARN_ON(ret);
+ return ret;
+}
+
+static noinline bool record_extent_backrefs(struct btrfs_path *path,
+ struct new_sa_defrag_extent *new)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(new->inode)->root->fs_info;
+ struct old_sa_defrag_extent *old, *tmp;
+ int ret;
+
+ new->path = path;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+ path, record_one_backref,
+ old);
+ BUG_ON(ret < 0 && ret != -ENOENT);
+
+ /* no backref to be processed for this extent */
+ if (!old->count) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ }
+
+ if (list_empty(&new->head))
+ return false;
+
+ return true;
+}
+
+static int relink_is_mergable(struct extent_buffer *leaf,
+ struct btrfs_file_extent_item *fi,
+ u64 disk_bytenr)
+{
+ if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+ return 0;
+
+ if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
+ return 0;
+
+ if (btrfs_file_extent_compression(leaf, fi) ||
+ btrfs_file_extent_encryption(leaf, fi) ||
+ btrfs_file_extent_other_encoding(leaf, fi))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * Note the backref might has changed, and in this case we just return 0.
+ */
+static noinline int relink_extent_backref(struct btrfs_path *path,
+ struct sa_defrag_extent_backref *prev,
+ struct sa_defrag_extent_backref *backref)
+{
+ struct btrfs_file_extent_item *extent;
+ struct btrfs_file_extent_item *item;
+ struct btrfs_ordered_extent *ordered;
+ struct btrfs_trans_handle *trans;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ struct extent_buffer *leaf;
+ struct old_sa_defrag_extent *old = backref->old;
+ struct new_sa_defrag_extent *new = old->new;
+ struct inode *src_inode = new->inode;
+ struct inode *inode;
+ struct extent_state *cached = NULL;
+ int ret = 0;
+ u64 start;
+ u64 len;
+ u64 lock_start;
+ u64 lock_end;
+ bool merge = false;
+ int index;
+
+ if (prev && prev->root_id == backref->root_id &&
+ prev->inum == backref->inum &&
+ prev->file_pos + prev->num_bytes == backref->file_pos)
+ merge = true;
+
+ /* step 1: get root */
+ key.objectid = backref->root_id;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ fs_info = BTRFS_I(src_inode)->root->fs_info;
+ index = srcu_read_lock(&fs_info->subvol_srcu);
+
+ root = btrfs_read_fs_root_no_name(fs_info, &key);
+ if (IS_ERR(root)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ if (PTR_ERR(root) == -ENOENT)
+ return 0;
+ return PTR_ERR(root);
+ }
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ /* parse ENOENT to 0 */
+ return 0;
+ }
+
+ /* step 2: get inode */
+ key.objectid = backref->inum;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR(inode)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ return 0;
+ }
+
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+
+ /* step 3: relink backref */
+ lock_start = backref->file_pos;
+ lock_end = backref->file_pos + backref->num_bytes - 1;
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
+ 0, &cached);
+
+ ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
+ if (ordered) {
+ btrfs_put_ordered_extent(ordered);
+ goto out_unlock;
+ }
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out_unlock;
+ }
+
+ key.objectid = backref->inum;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = backref->file_pos;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+ goto out_free_path;
+ } else if (ret > 0) {
+ ret = 0;
+ goto out_free_path;
+ }
+
+ extent = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+
+ if (btrfs_file_extent_generation(path->nodes[0], extent) !=
+ backref->generation)
+ goto out_free_path;
+
+ btrfs_release_path(path);
+
+ start = backref->file_pos;
+ if (backref->extent_offset < old->extent_offset + old->offset)
+ start += old->extent_offset + old->offset -
+ backref->extent_offset;
+
+ len = min(backref->extent_offset + backref->num_bytes,
+ old->extent_offset + old->offset + old->len);
+ len -= max(backref->extent_offset, old->extent_offset + old->offset);
+
+ ret = btrfs_drop_extents(trans, root, inode, start,
+ start + len, 1);
+ if (ret)
+ goto out_free_path;
+again:
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = start;
+
+ path->leave_spinning = 1;
+ if (merge) {
+ struct btrfs_file_extent_item *fi;
+ u64 extent_len;
+ struct btrfs_key found_key;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ if (ret < 0)
+ goto out_free_path;
+
+ path->slots[0]--;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+
+ if (relink_is_mergable(leaf, fi, new->bytenr) &&
+ extent_len + found_key.offset == start) {
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_len + len);
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+
+ ret = 1;
+ goto out_free_path;
+ } else {
+ merge = false;
+ btrfs_release_path(path);
+ goto again;
+ }
+ }
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*extent));
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_free_path;
+ }
+
+ leaf = path->nodes[0];
+ item = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_disk_bytenr(leaf, item, new->bytenr);
+ btrfs_set_file_extent_disk_num_bytes(leaf, item, new->disk_len);
+ btrfs_set_file_extent_offset(leaf, item, start - new->file_pos);
+ btrfs_set_file_extent_num_bytes(leaf, item, len);
+ btrfs_set_file_extent_ram_bytes(leaf, item, new->len);
+ btrfs_set_file_extent_generation(leaf, item, trans->transid);
+ btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_compression(leaf, item, new->compress_type);
+ btrfs_set_file_extent_encryption(leaf, item, 0);
+ btrfs_set_file_extent_other_encoding(leaf, item, 0);
+
+ btrfs_mark_buffer_dirty(leaf);
+ inode_add_bytes(inode, len);
+ btrfs_release_path(path);
+
+ ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
+ new->disk_len, 0,
+ backref->root_id, backref->inum,
+ new->file_pos, 0); /* start - extent_offset */
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_free_path;
+ }
+
+ ret = 1;
+out_free_path:
+ btrfs_release_path(path);
+ path->leave_spinning = 0;
+ btrfs_end_transaction(trans, root);
+out_unlock:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
+ &cached, GFP_NOFS);
+ iput(inode);
+ return ret;
+}
+
+static void relink_file_extents(struct new_sa_defrag_extent *new)
+{
+ struct btrfs_path *path;
+ struct old_sa_defrag_extent *old, *tmp;
+ struct sa_defrag_extent_backref *backref;
+ struct sa_defrag_extent_backref *prev = NULL;
+ struct inode *inode;
+ struct btrfs_root *root;
+ struct rb_node *node;
+ int ret;
+
+ inode = new->inode;
+ root = BTRFS_I(inode)->root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return;
+
+ if (!record_extent_backrefs(path, new)) {
+ btrfs_free_path(path);
+ goto out;
+ }
+ btrfs_release_path(path);
+
+ while (1) {
+ node = rb_first(&new->root);
+ if (!node)
+ break;
+ rb_erase(node, &new->root);
+
+ backref = rb_entry(node, struct sa_defrag_extent_backref, node);
+
+ ret = relink_extent_backref(path, prev, backref);
+ WARN_ON(ret < 0);
+
+ kfree(prev);
+
+ if (ret == 1)
+ prev = backref;
+ else
+ prev = NULL;
+ cond_resched();
+ }
+ kfree(prev);
+
+ btrfs_free_path(path);
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out:
+ atomic_dec(&root->fs_info->defrag_running);
+ wake_up(&root->fs_info->transaction_wait);
+
+ kfree(new);
+}
+
+static struct new_sa_defrag_extent *
+record_old_file_extents(struct inode *inode,
+ struct btrfs_ordered_extent *ordered)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+ struct old_sa_defrag_extent *old, *tmp;
+ struct new_sa_defrag_extent *new;
+ int ret;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (!new)
+ return NULL;
+
+ new->inode = inode;
+ new->file_pos = ordered->file_offset;
+ new->len = ordered->len;
+ new->bytenr = ordered->start;
+ new->disk_len = ordered->disk_len;
+ new->compress_type = ordered->compress_type;
+ new->root = RB_ROOT;
+ INIT_LIST_HEAD(&new->head);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ goto out_kfree;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = new->file_pos;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out_free_path;
+ if (ret > 0 && path->slots[0] > 0)
+ path->slots[0]--;
+
+ /* find out all the old extents for the file range */
+ while (1) {
+ struct btrfs_file_extent_item *extent;
+ struct extent_buffer *l;
+ int slot;
+ u64 num_bytes;
+ u64 offset;
+ u64 end;
+ u64 disk_bytenr;
+ u64 extent_offset;
+
+ l = path->nodes[0];
+ slot = path->slots[0];
+
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ goto out_free_list;
+ else if (ret > 0)
+ break;
+ continue;
+ }
+
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid != btrfs_ino(inode))
+ break;
+ if (key.type != BTRFS_EXTENT_DATA_KEY)
+ break;
+ if (key.offset >= new->file_pos + new->len)
+ break;
+
+ extent = btrfs_item_ptr(l, slot, struct btrfs_file_extent_item);
+
+ num_bytes = btrfs_file_extent_num_bytes(l, extent);
+ if (key.offset + num_bytes < new->file_pos)
+ goto next;
+
+ disk_bytenr = btrfs_file_extent_disk_bytenr(l, extent);
+ if (!disk_bytenr)
+ goto next;
+
+ extent_offset = btrfs_file_extent_offset(l, extent);
+
+ old = kmalloc(sizeof(*old), GFP_NOFS);
+ if (!old)
+ goto out_free_list;
+
+ offset = max(new->file_pos, key.offset);
+ end = min(new->file_pos + new->len, key.offset + num_bytes);
+
+ old->bytenr = disk_bytenr;
+ old->extent_offset = extent_offset;
+ old->offset = offset - key.offset;
+ old->len = end - offset;
+ old->new = new;
+ old->count = 0;
+ list_add_tail(&old->list, &new->head);
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+
+ btrfs_free_path(path);
+ atomic_inc(&root->fs_info->defrag_running);
+
+ return new;
+
+out_free_list:
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+out_free_path:
+ btrfs_free_path(path);
+out_kfree:
+ kfree(new);
+ return NULL;
+}
+
/*
* helper function for btrfs_finish_ordered_io, this
* just reads in some of the csum leaves to prime them into ram
@@ -1854,6 +2585,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
struct btrfs_trans_handle *trans = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
+ struct new_sa_defrag_extent *new = NULL;
int compress_type = 0;
int ret;
bool nolock;
@@ -1867,22 +2599,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
- ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_update_inode_fallback(trans, root, inode);
- if (ret) /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root);
+ else
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
}
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ ret = btrfs_update_inode_fallback(trans, root, inode);
+ if (ret) /* -ENOMEM or corruption */
+ btrfs_abort_transaction(trans, root, ret);
goto out;
}
@@ -1890,6 +2620,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
ordered_extent->file_offset + ordered_extent->len - 1,
0, &cached_state);
+ ret = test_range_bit(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ EXTENT_DEFRAG, 1, cached_state);
+ if (ret) {
+ u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
+ if (last_snapshot >= BTRFS_I(inode)->generation)
+ /* the inode is shared */
+ new = record_old_file_extents(inode, ordered_extent);
+
+ clear_extent_bit(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ EXTENT_DEFRAG, 0, 0, &cached_state, GFP_NOFS);
+ }
+
if (nolock)
trans = btrfs_join_transaction_nolock(root);
else
@@ -1931,15 +2675,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
- ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
- ret = btrfs_update_inode_fallback(trans, root, inode);
- if (ret) { /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
- goto out_unlock;
- }
- } else {
- btrfs_set_inode_last_trans(trans, inode);
+ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ ret = btrfs_update_inode_fallback(trans, root, inode);
+ if (ret) { /* -ENOMEM or corruption */
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_unlock;
}
ret = 0;
out_unlock:
@@ -1952,17 +2692,33 @@ out:
if (trans)
btrfs_end_transaction(trans, root);
- if (ret)
+ if (ret) {
clear_extent_uptodate(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, NULL, GFP_NOFS);
+ /*
+ * If the ordered extent had an IOERR or something else went
+ * wrong we need to return the space for this ordered extent
+ * back to the allocator.
+ */
+ if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+ !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
+ btrfs_free_reserved_extent(root, ordered_extent->start,
+ ordered_extent->disk_len);
+ }
+
+
/*
* This needs to be done to make sure anybody waiting knows we are done
* updating everything for this ordered extent.
*/
btrfs_remove_ordered_extent(inode, ordered_extent);
+ /* for snapshot-aware defrag */
+ if (new)
+ relink_file_extents(new);
+
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
@@ -2013,7 +2769,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror)
{
- size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
+ size_t offset = start - page_offset(page);
struct inode *inode = page->mapping->host;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
char *kaddr;
@@ -2118,11 +2874,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
}
}
-enum btrfs_orphan_cleanup_state {
- ORPHAN_CLEANUP_STARTED = 1,
- ORPHAN_CLEANUP_DONE = 2,
-};
-
/*
* This is called in transaction commit time. If there are no orphan
* files in the subvolume, it removes orphan item and frees block_rsv
@@ -2420,6 +3171,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*/
set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags);
+ atomic_inc(&root->orphan_inodes);
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
@@ -2429,7 +3181,21 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
+
+ /* 1 for the orphan item deletion. */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ ret = btrfs_orphan_add(trans, inode);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ goto out;
+
ret = btrfs_truncate(inode);
+ if (ret)
+ btrfs_orphan_del(NULL, inode);
} else {
nr_unlink++;
}
@@ -2648,34 +3414,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
- btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
- btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
- btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
- btrfs_set_inode_mode(leaf, item, inode->i_mode);
- btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
+ struct btrfs_map_token token;
+
+ btrfs_init_map_token(&token);
- btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
- inode->i_atime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
- inode->i_atime.tv_nsec);
+ btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
+ btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
+ btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
+ &token);
+ btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
+ btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
- btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
- inode->i_mtime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
- inode->i_mtime.tv_nsec);
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
+ inode->i_atime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
+ inode->i_atime.tv_nsec, &token);
- btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
- inode->i_ctime.tv_sec);
- btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
- inode->i_ctime.tv_nsec);
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
+ inode->i_mtime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
+ inode->i_mtime.tv_nsec, &token);
- btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode));
- btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
- btrfs_set_inode_sequence(leaf, item, inode->i_version);
- btrfs_set_inode_transid(leaf, item, trans->transid);
- btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
- btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
- btrfs_set_inode_block_group(leaf, item, 0);
+ btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
+ inode->i_ctime.tv_sec, &token);
+ btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
+ inode->i_ctime.tv_nsec, &token);
+
+ btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
+ &token);
+ btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
+ &token);
+ btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
+ btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
+ btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
+ btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
+ btrfs_set_token_inode_block_group(leaf, item, 0, &token);
}
/*
@@ -3074,7 +3847,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct inode *inode = dentry->d_inode;
int ret;
- unsigned long nr = 0;
trans = __unlink_start_trans(dir, dentry);
if (IS_ERR(trans))
@@ -3094,9 +3866,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
}
out:
- nr = trans->blocks_used;
__unlink_end_trans(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return ret;
}
@@ -3186,7 +3957,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
- unsigned long nr = 0;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
@@ -3215,9 +3985,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (!err)
btrfs_i_size_write(inode, 0);
out:
- nr = trans->blocks_used;
__unlink_end_trans(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -3247,7 +4016,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
- u64 mask = root->sectorsize - 1;
u32 found_type = (u8)-1;
int found_extent;
int del_item;
@@ -3271,7 +4039,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* extent just the way it is.
*/
if (root->ref_cows || root == root->fs_info->tree_root)
- btrfs_drop_extent_cache(inode, (new_size + mask) & (~mask), (u64)-1, 0);
+ btrfs_drop_extent_cache(inode, ALIGN(new_size,
+ root->sectorsize), (u64)-1, 0);
/*
* This function is also used to drop the items in the log tree before
@@ -3350,10 +4119,9 @@ search_again:
if (!del_item) {
u64 orig_num_bytes =
btrfs_file_extent_num_bytes(leaf, fi);
- extent_num_bytes = new_size -
- found_key.offset + root->sectorsize - 1;
- extent_num_bytes = extent_num_bytes &
- ~((u64)root->sectorsize - 1);
+ extent_num_bytes = ALIGN(new_size -
+ found_key.offset,
+ root->sectorsize);
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_num_bytes);
num_dec = (orig_num_bytes -
@@ -3497,11 +4265,11 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
if (ret)
goto out;
- ret = -ENOMEM;
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+ ret = -ENOMEM;
goto out;
}
@@ -3550,7 +4318,6 @@ again:
goto out_unlock;
}
- ret = 0;
if (offset != PAGE_CACHE_SIZE) {
if (!len)
len = PAGE_CACHE_SIZE - offset;
@@ -3590,9 +4357,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
struct extent_map *em = NULL;
struct extent_state *cached_state = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- u64 mask = root->sectorsize - 1;
- u64 hole_start = (oldsize + mask) & ~mask;
- u64 block_end = (size + mask) & ~mask;
+ u64 hole_start = ALIGN(oldsize, root->sectorsize);
+ u64 block_end = ALIGN(size, root->sectorsize);
u64 last_byte;
u64 cur_offset;
u64 hole_size;
@@ -3621,10 +4387,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
block_end - cur_offset, 0);
if (IS_ERR(em)) {
err = PTR_ERR(em);
+ em = NULL;
break;
}
last_byte = min(extent_map_end(em), block_end);
- last_byte = (last_byte + mask) & ~mask;
+ last_byte = ALIGN(last_byte , root->sectorsize);
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
struct extent_map *hole_em;
hole_size = last_byte - cur_offset;
@@ -3668,6 +4435,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
+ hole_em->orig_block_len = 0;
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
@@ -3703,16 +4471,27 @@ next:
return err;
}
-static int btrfs_setsize(struct inode *inode, loff_t newsize)
+static int btrfs_setsize(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode);
+ loff_t newsize = attr->ia_size;
+ int mask = attr->ia_valid;
int ret;
if (newsize == oldsize)
return 0;
+ /*
+ * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
+ * special case where we need to update the times despite not having
+ * these flags set. For all other operations the VFS set these flags
+ * explicitly if it wants a timestamp update.
+ */
+ if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
+ inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+
if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3738,9 +4517,40 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
&BTRFS_I(inode)->runtime_flags);
+ /*
+ * 1 for the orphan item we're going to add
+ * 1 for the orphan item deletion.
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ /*
+ * We need to do this in case we fail at _any_ point during the
+ * actual truncate. Once we do the truncate_setsize we could
+ * invalidate pages which forces any outstanding ordered io to
+ * be instantly completed which will give us extents that need
+ * to be truncated. If we fail to get an orphan inode down we
+ * could have left over extents that were never meant to live,
+ * so we need to garuntee from this point on that everything
+ * will be consistent.
+ */
+ ret = btrfs_orphan_add(trans, inode);
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ return ret;
+
/* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize);
+
+ /* Disable nonlocked read DIO to avoid the end less truncate */
+ btrfs_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+ btrfs_inode_resume_unlocked_dio(inode);
+
ret = btrfs_truncate(inode);
+ if (ret && inode->i_nlink)
+ btrfs_orphan_del(NULL, inode);
}
return ret;
@@ -3760,7 +4570,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- err = btrfs_setsize(inode, attr->ia_size);
+ err = btrfs_setsize(inode, attr);
if (err)
return err;
}
@@ -3783,7 +4593,6 @@ void btrfs_evict_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv, *global_rsv;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- unsigned long nr;
int ret;
trace_btrfs_inode_evict(inode);
@@ -3811,6 +4620,12 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
+ ret = btrfs_commit_inode_delayed_inode(inode);
+ if (ret) {
+ btrfs_orphan_del(NULL, inode);
+ goto no_delete;
+ }
+
rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
if (!rsv) {
btrfs_orphan_del(NULL, inode);
@@ -3829,7 +4644,8 @@ void btrfs_evict_inode(struct inode *inode)
* inode item when doing the truncate.
*/
while (1) {
- ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
+ ret = btrfs_block_rsv_refill(root, rsv, min_size,
+ BTRFS_RESERVE_FLUSH_LIMIT);
/*
* Try and steal from the global reserve since we will
@@ -3847,7 +4663,7 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
- trans = btrfs_start_transaction_noflush(root, 1);
+ trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
@@ -3861,13 +4677,9 @@ void btrfs_evict_inode(struct inode *inode)
break;
trans->block_rsv = &root->fs_info->trans_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
- BUG_ON(ret);
-
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
trans = NULL;
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
}
btrfs_free_block_rsv(root, rsv);
@@ -3883,9 +4695,8 @@ void btrfs_evict_inode(struct inode *inode)
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
btrfs_return_ino(root, btrfs_ino(inode));
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
no_delete:
clear_inode(inode);
return;
@@ -4219,16 +5030,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- if (unlikely(d_need_lookup(dentry))) {
- memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
- kfree(dentry->d_fsdata);
- dentry->d_fsdata = NULL;
- /* This thing is hashed, drop it for now */
- d_drop(dentry);
- } else {
- ret = btrfs_inode_by_name(dir, dentry, &location);
- }
-
+ ret = btrfs_inode_by_name(dir, dentry, &location);
if (ret < 0)
return ERR_PTR(ret);
@@ -4298,11 +5100,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct dentry *ret;
ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
- if (unlikely(d_need_lookup(dentry))) {
- spin_lock(&dentry->d_lock);
- dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
- spin_unlock(&dentry->d_lock);
- }
return ret;
}
@@ -4313,7 +5110,7 @@ unsigned char btrfs_filetype_table[] = {
static int btrfs_real_readdir(struct file *filp, void *dirent,
filldir_t filldir)
{
- struct inode *inode = filp->f_dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_item *item;
struct btrfs_dir_item *di;
@@ -4775,9 +5572,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (S_ISREG(mode)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW) ||
- (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
- BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+ if (btrfs_test_opt(root, NODATACOW))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_NODATASUM;
}
insert_inode_hash(inode);
@@ -4842,7 +5639,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, root, name, name_len,
parent_inode, &key,
btrfs_inode_type(inode), index);
- if (ret == -EEXIST)
+ if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
else if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -4897,7 +5694,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
int err;
int drop_inode = 0;
u64 objectid;
- unsigned long nr = 0;
u64 index = 0;
if (!new_valid_dev(rdev))
@@ -4947,9 +5743,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
}
out_unlock:
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
@@ -4963,9 +5758,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
- int drop_inode = 0;
+ int drop_inode_on_err = 0;
int err;
- unsigned long nr = 0;
u64 objectid;
u64 index = 0;
@@ -4989,12 +5783,15 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
goto out_unlock;
}
+ drop_inode_on_err = 1;
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
+ if (err)
+ goto out_unlock;
+
+ err = btrfs_update_inode(trans, root, inode);
+ if (err)
goto out_unlock;
- }
/*
* If the active LSM wants to access the inode during
@@ -5007,21 +5804,20 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
- drop_inode = 1;
- else {
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- d_instantiate(dentry, inode);
- }
+ goto out_unlock;
+
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+ d_instantiate(dentry, inode);
+
out_unlock:
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- if (drop_inode) {
+ if (err && drop_inode_on_err) {
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -5032,7 +5828,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = old_dentry->d_inode;
u64 index;
- unsigned long nr = 0;
int err;
int drop_inode = 0;
@@ -5062,6 +5857,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
+ set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -5076,14 +5872,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
btrfs_log_new_name(trans, inode, NULL, parent);
}
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -5096,7 +5891,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int drop_on_err = 0;
u64 objectid = 0;
u64 index = 0;
- unsigned long nr = 1;
/*
* 2 items for inode and ref
@@ -5142,11 +5936,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
drop_on_err = 0;
out_fail:
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
if (drop_on_err)
iput(inode);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -5317,8 +6110,7 @@ again:
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
size = btrfs_file_extent_inline_len(leaf, item);
- extent_end = (extent_start + size + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
+ extent_end = ALIGN(extent_start + size, root->sectorsize);
}
if (start >= extent_end) {
@@ -5340,6 +6132,7 @@ again:
if (start + len <= found_key.offset)
goto not_found;
em->start = start;
+ em->orig_start = start;
em->len = found_key.offset - start;
goto not_found_em;
}
@@ -5350,6 +6143,8 @@ again:
em->len = extent_end - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, item);
+ em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
+ item);
bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
@@ -5359,8 +6154,7 @@ again:
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
em->block_start = bytenr;
- em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
- item);
+ em->block_len = em->orig_block_len;
} else {
bytenr += btrfs_file_extent_offset(leaf, item);
em->block_start = bytenr;
@@ -5388,9 +6182,9 @@ again:
copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
size - extent_offset);
em->start = extent_start + extent_offset;
- em->len = (copy_size + root->sectorsize - 1) &
- ~((u64)root->sectorsize - 1);
- em->orig_start = EXTENT_MAP_INLINE;
+ em->len = ALIGN(copy_size, root->sectorsize);
+ em->orig_block_len = em->len;
+ em->orig_start = em->start;
if (compress_type) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
@@ -5439,11 +6233,11 @@ again:
extent_map_end(em) - 1, NULL, GFP_NOFS);
goto insert;
} else {
- printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
- WARN_ON(1);
+ WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);
}
not_found:
em->start = start;
+ em->orig_start = start;
em->len = len;
not_found_em:
em->block_start = EXTENT_MAP_HOLE;
@@ -5539,10 +6333,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
return em;
if (em) {
/*
- * if our em maps to a hole, there might
- * actually be delalloc bytes behind it
+ * if our em maps to
+ * - a hole or
+ * - a pre-alloc extent,
+ * there might actually be delalloc bytes behind it.
*/
- if (em->block_start != EXTENT_MAP_HOLE)
+ if (em->block_start != EXTENT_MAP_HOLE &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
return em;
else
hole_em = em;
@@ -5624,6 +6421,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
*/
em->block_start = hole_em->block_start;
em->block_len = hole_len;
+ if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
+ set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
} else {
em->start = range_start;
em->len = found;
@@ -5645,38 +6444,19 @@ out:
}
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
- struct extent_map *em,
u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
int ret;
- bool insert = false;
-
- /*
- * Ok if the extent map we looked up is a hole and is for the exact
- * range we want, there is no reason to allocate a new one, however if
- * it is not right then we need to free this one and drop the cache for
- * our range.
- */
- if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
- em->len != len) {
- free_extent_map(em);
- em = NULL;
- insert = true;
- btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
- }
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return ERR_CAST(trans);
- if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
- btrfs_add_inode_defrag(trans, inode);
-
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5687,37 +6467,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
goto out;
}
- if (!em) {
- em = alloc_extent_map();
- if (!em) {
- em = ERR_PTR(-ENOMEM);
- goto out;
- }
- }
-
- em->start = start;
- em->orig_start = em->start;
- em->len = ins.offset;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
-
- /*
- * We need to do this because if we're using the original em we searched
- * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
- */
- em->flags = 0;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- while (insert) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST)
- break;
- btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
- }
+ em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+ ins.offset, ins.offset, 0);
+ if (IS_ERR(em))
+ goto out;
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
@@ -5894,7 +6647,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
- int type)
+ u64 orig_block_len, int type)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
@@ -5908,19 +6661,26 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->start = start;
em->orig_start = orig_start;
+ em->mod_start = start;
+ em->mod_len = len;
em->len = len;
em->block_len = block_len;
em->block_start = block_start;
em->bdev = root->fs_info->fs_devices->latest_bdev;
+ em->orig_block_len = orig_block_len;
+ em->generation = -1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
if (type == BTRFS_ORDERED_PREALLOC)
- set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+ set_bit(EXTENT_FLAG_FILLING, &em->flags);
do {
btrfs_drop_extent_cache(inode, em->start,
em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
@@ -5944,16 +6704,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
u64 len = bh_result->b_size;
struct btrfs_trans_handle *trans;
int unlock_bits = EXTENT_LOCKED;
- int ret;
+ int ret = 0;
- if (create) {
- ret = btrfs_delalloc_reserve_space(inode, len);
- if (ret)
- return ret;
+ if (create)
unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
- } else {
+ else
len = min_t(u64, len, root->sectorsize);
- }
lockstart = start;
lockend = start + len - 1;
@@ -5965,14 +6721,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
return -ENOTBLK;
- if (create) {
- ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, EXTENT_DELALLOC, NULL,
- &cached_state, GFP_NOFS);
- if (ret)
- goto unlock_err;
- }
-
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -6004,7 +6752,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (!create && (em->block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
free_extent_map(em);
- ret = 0;
goto unlock_err;
}
@@ -6047,13 +6794,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
goto must_cow;
if (can_nocow_odirect(trans, inode, start, len) == 1) {
- u64 orig_start = em->start;
+ u64 orig_start = em->orig_start;
+ u64 orig_block_len = em->orig_block_len;
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
em = create_pinned_em(inode, start, len,
orig_start,
- block_start, len, type);
+ block_start, len,
+ orig_block_len, type);
if (IS_ERR(em)) {
btrfs_end_transaction(trans, root);
goto unlock_err;
@@ -6077,7 +6826,8 @@ must_cow:
* it above
*/
len = bh_result->b_size;
- em = btrfs_new_extent_direct(inode, em, start, len);
+ free_extent_map(em);
+ em = btrfs_new_extent_direct(inode, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
@@ -6099,6 +6849,15 @@ unlock:
*/
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
+
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+
+ ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockstart + len - 1, EXTENT_DELALLOC, NULL,
+ &cached_state, GFP_NOFS);
+ BUG_ON(ret);
}
/*
@@ -6107,24 +6866,9 @@ unlock:
* aren't using if there is any left over space.
*/
if (lockstart < lockend) {
- if (create && len < lockend - lockstart) {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockstart + len - 1,
- unlock_bits | EXTENT_DEFRAG, 1, 0,
- &cached_state, GFP_NOFS);
- /*
- * Beside unlock, we also need to cleanup reserved space
- * for the left range by attaching EXTENT_DO_ACCOUNTING.
- */
- clear_extent_bit(&BTRFS_I(inode)->io_tree,
- lockstart + len, lockend,
- unlock_bits | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
- } else {
- clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
- lockend, unlock_bits, 1, 0,
- &cached_state, GFP_NOFS);
- }
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, unlock_bits, 1, 0,
+ &cached_state, GFP_NOFS);
} else {
free_extent_state(cached_state);
}
@@ -6134,9 +6878,6 @@ unlock:
return 0;
unlock_err:
- if (create)
- unlock_bits |= EXTENT_DO_ACCOUNTING;
-
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
return ret;
@@ -6318,6 +7059,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
+ if (async_submit)
+ async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
bio_get(bio);
if (!write) {
@@ -6362,7 +7106,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
{
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
struct bio_vec *bvec = orig_bio->bi_io_vec;
@@ -6375,19 +7118,24 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int async_submit = 0;
map_length = orig_bio->bi_size;
- ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(orig_bio);
return -EIO;
}
-
if (map_length >= orig_bio->bi_size) {
bio = orig_bio;
goto submit;
}
- async_submit = 1;
+ /* async crcs make it difficult to collect full stripe writes. */
+ if (btrfs_get_alloc_profile(root, 1) &
+ (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))
+ async_submit = 0;
+ else
+ async_submit = 1;
+
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
if (!bio)
return -ENOMEM;
@@ -6429,7 +7177,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_end_io = btrfs_end_dio_bio;
map_length = orig_bio->bi_size;
- ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ ret = btrfs_map_block(root->fs_info, rw,
+ start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(bio);
@@ -6571,20 +7320,73 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ size_t count = 0;
+ int flags = 0;
+ bool wakeup = true;
+ bool relock = false;
+ ssize_t ret;
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
offset, nr_segs))
return 0;
- return __blockdev_direct_IO(rw, iocb, inode,
- BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
- iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
- btrfs_submit_direct, 0);
+ atomic_inc(&inode->i_dio_count);
+ smp_mb__after_atomic_inc();
+
+ if (rw & WRITE) {
+ count = iov_length(iov, nr_segs);
+ /*
+ * If the write DIO is beyond the EOF, we need update
+ * the isize, but it is protected by i_mutex. So we can
+ * not unlock the i_mutex at this case.
+ */
+ if (offset + count <= inode->i_size) {
+ mutex_unlock(&inode->i_mutex);
+ relock = true;
+ }
+ ret = btrfs_delalloc_reserve_space(inode, count);
+ if (ret)
+ goto out;
+ } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
+ &BTRFS_I(inode)->runtime_flags))) {
+ inode_dio_done(inode);
+ flags = DIO_LOCKING | DIO_SKIP_HOLES;
+ wakeup = false;
+ }
+
+ ret = __blockdev_direct_IO(rw, iocb, inode,
+ BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
+ iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+ btrfs_submit_direct, flags);
+ if (rw & WRITE) {
+ if (ret < 0 && ret != -EIOCBQUEUED)
+ btrfs_delalloc_release_space(inode, count);
+ else if (ret >= 0 && (size_t)ret < count)
+ btrfs_delalloc_release_space(inode,
+ count - (size_t)ret);
+ else
+ btrfs_delalloc_release_metadata(inode, 0);
+ }
+out:
+ if (wakeup)
+ inode_dio_done(inode);
+ if (relock)
+ mutex_lock(&inode->i_mutex);
+
+ return ret;
}
+#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
+
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
+ int ret;
+
+ ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+ if (ret)
+ return ret;
+
return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
@@ -6675,8 +7477,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
return;
}
lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
- ordered = btrfs_lookup_ordered_extent(inode,
- page_offset(page));
+ ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
if (ordered) {
/*
* IO on this page will never be started, so we need
@@ -6731,7 +7532,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct page *page = vmf->page;
- struct inode *inode = fdentry(vma->vm_file)->d_inode;
+ struct inode *inode = file_inode(vma->vm_file);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered;
@@ -6855,7 +7656,6 @@ static int btrfs_truncate(struct inode *inode)
int ret;
int err = 0;
struct btrfs_trans_handle *trans;
- unsigned long nr;
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
@@ -6910,11 +7710,9 @@ static int btrfs_truncate(struct inode *inode)
/*
* 1 for the truncate slack space
- * 1 for the orphan item we're going to add
- * 1 for the orphan item deletion
* 1 for updating the inode.
*/
- trans = btrfs_start_transaction(root, 4);
+ trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
@@ -6925,12 +7723,6 @@ static int btrfs_truncate(struct inode *inode)
min_size);
BUG_ON(ret);
- ret = btrfs_orphan_add(trans, inode);
- if (ret) {
- btrfs_end_transaction(trans, root);
- goto out;
- }
-
/*
* setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That
@@ -6978,9 +7770,8 @@ static int btrfs_truncate(struct inode *inode)
break;
}
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
@@ -7000,12 +7791,6 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_orphan_del(trans, inode);
if (ret)
err = ret;
- } else if (ret && inode->i_nlink > 0) {
- /*
- * Failed to do the truncate, remove us from the in memory
- * orphan list.
- */
- ret = btrfs_orphan_del(NULL, inode);
}
if (trans) {
@@ -7014,9 +7799,8 @@ static int btrfs_truncate(struct inode *inode)
if (ret && !err)
err = ret;
- nr = trans->blocks_used;
ret = btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
}
out:
@@ -7093,6 +7877,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
ei->io_tree.track_uptodate = 1;
ei->io_failure_tree.track_uptodate = 1;
+ atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7172,8 +7957,9 @@ int btrfs_drop_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ /* the snap/subvol tree is on deleting */
if (btrfs_root_refs(&root->root_item) == 0 &&
- !btrfs_is_free_space_inode(inode))
+ root != root->fs_info->tree_root)
return 1;
else
return generic_drop_inode(inode);
@@ -7203,6 +7989,8 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_path_cachep);
if (btrfs_free_space_cachep)
kmem_cache_destroy(btrfs_free_space_cachep);
+ if (btrfs_delalloc_work_cachep)
+ kmem_cache_destroy(btrfs_delalloc_work_cachep);
}
int btrfs_init_cachep(void)
@@ -7237,6 +8025,13 @@ int btrfs_init_cachep(void)
if (!btrfs_free_space_cachep)
goto fail;
+ btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
+ sizeof(struct btrfs_delalloc_work), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
+ if (!btrfs_delalloc_work_cachep)
+ goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();
@@ -7246,40 +8041,22 @@ fail:
static int btrfs_getattr(struct vfsmount *mnt,
struct dentry *dentry, struct kstat *stat)
{
+ u64 delalloc_bytes;
struct inode *inode = dentry->d_inode;
u32 blocksize = inode->i_sb->s_blocksize;
generic_fillattr(inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->blksize = PAGE_CACHE_SIZE;
+
+ spin_lock(&BTRFS_I(inode)->lock);
+ delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
+ spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
- ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
+ ALIGN(delalloc_bytes, blocksize)) >> 9;
return 0;
}
-/*
- * If a file is moved, it will inherit the cow and compression flags of the new
- * directory.
- */
-static void fixup_inode_flags(struct inode *dir, struct inode *inode)
-{
- struct btrfs_inode *b_dir = BTRFS_I(dir);
- struct btrfs_inode *b_inode = BTRFS_I(inode);
-
- if (b_dir->flags & BTRFS_INODE_NODATACOW)
- b_inode->flags |= BTRFS_INODE_NODATACOW;
- else
- b_inode->flags &= ~BTRFS_INODE_NODATACOW;
-
- if (b_dir->flags & BTRFS_INODE_COMPRESS) {
- b_inode->flags |= BTRFS_INODE_COMPRESS;
- b_inode->flags &= ~BTRFS_INODE_NOCOMPRESS;
- } else {
- b_inode->flags &= ~(BTRFS_INODE_COMPRESS |
- BTRFS_INODE_NOCOMPRESS);
- }
-}
-
static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -7308,6 +8085,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode) && new_inode &&
new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
+
+
+ /* check for collisions, even if the name isn't there */
+ ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+
+ if (ret) {
+ if (ret == -EEXIST) {
+ /* we shouldn't get
+ * eexist without a new_inode */
+ if (!new_inode) {
+ WARN_ON(1);
+ return ret;
+ }
+ } else {
+ /* maybe -EOVERFLOW */
+ return ret;
+ }
+ }
+ ret = 0;
+
/*
* we're using rename to replace one file with another.
* and the replacement file is large. Start IO on it now so
@@ -7423,8 +8222,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
- fixup_inode_flags(new_dir, old_inode);
-
ret = btrfs_add_link(trans, new_dir, old_inode,
new_dentry->d_name.name,
new_dentry->d_name.len, 0, index);
@@ -7447,39 +8244,106 @@ out_notrans:
return ret;
}
+static void btrfs_run_delalloc_work(struct btrfs_work *work)
+{
+ struct btrfs_delalloc_work *delalloc_work;
+
+ delalloc_work = container_of(work, struct btrfs_delalloc_work,
+ work);
+ if (delalloc_work->wait)
+ btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
+ else
+ filemap_flush(delalloc_work->inode->i_mapping);
+
+ if (delalloc_work->delay_iput)
+ btrfs_add_delayed_iput(delalloc_work->inode);
+ else
+ iput(delalloc_work->inode);
+ complete(&delalloc_work->completion);
+}
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+ int wait, int delay_iput)
+{
+ struct btrfs_delalloc_work *work;
+
+ work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+ if (!work)
+ return NULL;
+
+ init_completion(&work->completion);
+ INIT_LIST_HEAD(&work->list);
+ work->inode = inode;
+ work->wait = wait;
+ work->delay_iput = delay_iput;
+ work->work.func = btrfs_run_delalloc_work;
+
+ return work;
+}
+
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
+{
+ wait_for_completion(&work->completion);
+ kmem_cache_free(btrfs_delalloc_work_cachep, work);
+}
+
/*
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
- struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
struct inode *inode;
+ struct btrfs_delalloc_work *work, *next;
+ struct list_head works;
+ struct list_head splice;
+ int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
+ INIT_LIST_HEAD(&works);
+ INIT_LIST_HEAD(&splice);
+
spin_lock(&root->fs_info->delalloc_lock);
- while (!list_empty(head)) {
- binode = list_entry(head->next, struct btrfs_inode,
+ list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+ while (!list_empty(&splice)) {
+ binode = list_entry(splice.next, struct btrfs_inode,
delalloc_inodes);
+
+ list_del_init(&binode->delalloc_inodes);
+
inode = igrab(&binode->vfs_inode);
- if (!inode)
- list_del_init(&binode->delalloc_inodes);
+ if (!inode) {
+ clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &binode->runtime_flags);
+ continue;
+ }
+
+ list_add_tail(&binode->delalloc_inodes,
+ &root->fs_info->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
- if (inode) {
- filemap_flush(inode->i_mapping);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+
+ work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+ if (unlikely(!work)) {
+ ret = -ENOMEM;
+ goto out;
}
+ list_add_tail(&work->list, &works);
+ btrfs_queue_worker(&root->fs_info->flush_workers,
+ &work->work);
+
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
}
spin_unlock(&root->fs_info->delalloc_lock);
+ list_for_each_entry_safe(work, next, &works, list) {
+ list_del_init(&work->list);
+ btrfs_wait_and_free_delalloc_work(work);
+ }
+
/* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
* ordered extents get created before we return
@@ -7493,6 +8357,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
}
atomic_dec(&root->fs_info->async_submit_draining);
return 0;
+out:
+ list_for_each_entry_safe(work, next, &works, list) {
+ list_del_init(&work->list);
+ btrfs_wait_and_free_delalloc_work(work);
+ }
+
+ if (!list_empty_careful(&splice)) {
+ spin_lock(&root->fs_info->delalloc_lock);
+ list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
+ spin_unlock(&root->fs_info->delalloc_lock);
+ }
+ return ret;
}
static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -7512,7 +8388,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
unsigned long ptr;
struct btrfs_file_extent_item *ei;
struct extent_buffer *leaf;
- unsigned long nr = 0;
name_len = strlen(symname) + 1;
if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
@@ -7610,13 +8485,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
out_unlock:
if (!err)
d_instantiate(dentry, inode);
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -7631,6 +8505,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
struct btrfs_key ins;
u64 cur_offset = start;
u64 i_size;
+ u64 cur_bytes;
int ret = 0;
bool own_trans = true;
@@ -7645,8 +8520,10 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
}
}
- ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
- 0, *alloc_hint, &ins, 1);
+ cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
+ cur_bytes = max(cur_bytes, min_size);
+ ret = btrfs_reserve_extent(trans, root, cur_bytes,
+ min_size, 0, *alloc_hint, &ins, 1);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -7679,6 +8556,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->len = ins.offset;
em->block_start = ins.objectid;
em->block_len = ins.offset;
+ em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
em->generation = trans->transid;