summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1361
1 files changed, 922 insertions, 439 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f1a77449d032..016c403bfe7e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -58,9 +58,10 @@
#include "inode-map.h"
#include "backref.h"
#include "hash.h"
+#include "props.h"
struct btrfs_iget_args {
- u64 ino;
+ struct btrfs_key *location;
struct btrfs_root *root;
};
@@ -124,14 +125,13 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
* the btree. The caller should have done a btrfs_drop_extents so that
* no overlapping inline items exist in the btree
*/
-static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+static int insert_inline_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path, int extent_inserted,
struct btrfs_root *root, struct inode *inode,
u64 start, size_t size, size_t compressed_size,
int compress_type,
struct page **compressed_pages)
{
- struct btrfs_key key;
- struct btrfs_path *path;
struct extent_buffer *leaf;
struct page *page = NULL;
char *kaddr;
@@ -140,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
int err = 0;
int ret;
size_t cur_size = size;
- size_t datasize;
unsigned long offset;
if (compressed_size && compressed_pages)
cur_size = compressed_size;
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ inode_add_bytes(inode, size);
- path->leave_spinning = 1;
+ if (!extent_inserted) {
+ struct btrfs_key key;
+ size_t datasize;
- key.objectid = btrfs_ino(inode);
- key.offset = start;
- btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- datasize = btrfs_file_extent_calc_inline_size(cur_size);
+ key.objectid = btrfs_ino(inode);
+ key.offset = start;
+ btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
- inode_add_bytes(inode, size);
- ret = btrfs_insert_empty_item(trans, root, path, &key,
- datasize);
- if (ret) {
- err = ret;
- goto fail;
+ datasize = btrfs_file_extent_calc_inline_size(cur_size);
+ path->leave_spinning = 1;
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ datasize);
+ if (ret) {
+ err = ret;
+ goto fail;
+ }
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
page_cache_release(page);
}
btrfs_mark_buffer_dirty(leaf);
- btrfs_free_path(path);
+ btrfs_release_path(path);
/*
* we're an inline extent, so nobody can
@@ -219,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
return ret;
fail:
- btrfs_free_path(path);
return err;
}
@@ -242,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
u64 aligned_end = ALIGN(end, root->sectorsize);
u64 data_len = inline_len;
int ret;
+ struct btrfs_path *path;
+ int extent_inserted = 0;
+ u32 extent_item_size;
if (compressed_size)
data_len = compressed_size;
@@ -256,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
return 1;
}
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
trans = btrfs_join_transaction(root);
- if (IS_ERR(trans))
+ if (IS_ERR(trans)) {
+ btrfs_free_path(path);
return PTR_ERR(trans);
+ }
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
+ if (compressed_size && compressed_pages)
+ extent_item_size = btrfs_file_extent_calc_inline_size(
+ compressed_size);
+ else
+ extent_item_size = btrfs_file_extent_calc_inline_size(
+ inline_len);
+
+ ret = __btrfs_drop_extents(trans, root, inode, path,
+ start, aligned_end, NULL,
+ 1, 1, extent_item_size, &extent_inserted);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto out;
@@ -269,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
if (isize > actual_end)
inline_len = min_t(u64, isize, actual_end);
- ret = insert_inline_extent(trans, root, inode, start,
+ ret = insert_inline_extent(trans, path, extent_inserted,
+ root, inode, start,
inline_len, compressed_size,
compress_type, compressed_pages);
if (ret && ret != -ENOSPC) {
@@ -284,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
btrfs_delalloc_release_metadata(inode, end + 1 - start);
btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
out:
+ btrfs_free_path(path);
btrfs_end_transaction(trans, root);
return ret;
}
@@ -375,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode,
(start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
btrfs_add_inode_defrag(NULL, inode);
+ /*
+ * skip compression for a small file range(<=blocksize) that
+ * isn't an inline extent, since it dosen't save disk space at all.
+ */
+ if ((end - start + 1) <= blocksize &&
+ (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+ goto cleanup_and_bail_uncompressed;
+
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
@@ -666,7 +693,7 @@ retry:
ret = btrfs_reserve_extent(root,
async_extent->compressed_size,
async_extent->compressed_size,
- 0, alloc_hint, &ins, 1);
+ 0, alloc_hint, &ins, 1, 1);
if (ret) {
int i;
@@ -682,6 +709,18 @@ retry:
unlock_extent(io_tree, async_extent->start,
async_extent->start +
async_extent->ram_size - 1);
+
+ /*
+ * we need to redirty the pages if we decide to
+ * fallback to uncompressed IO, otherwise we
+ * will not submit these pages down to lower
+ * layers.
+ */
+ extent_range_redirty_for_io(inode,
+ async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1);
+
goto retry;
}
goto out_free;
@@ -739,8 +778,12 @@ retry:
ins.offset,
BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
- if (ret)
+ if (ret) {
+ btrfs_drop_extent_cache(inode, async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1, 0);
goto out_free_reserve;
+ }
/*
* clear dirty, set writeback and unlock the pages.
@@ -767,7 +810,7 @@ retry:
out:
return ret;
out_free_reserve:
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
out_free:
extent_clear_unlock_delalloc(inode, async_extent->start,
async_extent->start +
@@ -845,7 +888,8 @@ static noinline int cow_file_range(struct inode *inode,
if (btrfs_is_free_space_inode(inode)) {
WARN_ON_ONCE(1);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
num_bytes = ALIGN(end - start + 1, blocksize);
@@ -889,7 +933,7 @@ static noinline int cow_file_range(struct inode *inode,
cur_alloc_size = disk_num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size,
root->sectorsize, 0, alloc_hint,
- &ins, 1);
+ &ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -931,14 +975,14 @@ static noinline int cow_file_range(struct inode *inode,
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
ram_size, cur_alloc_size, 0);
if (ret)
- goto out_reserve;
+ goto out_drop_extent_cache;
if (root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
if (ret)
- goto out_reserve;
+ goto out_drop_extent_cache;
}
if (disk_num_bytes < cur_alloc_size)
@@ -966,8 +1010,10 @@ static noinline int cow_file_range(struct inode *inode,
out:
return ret;
+out_drop_extent_cache:
+ btrfs_drop_extent_cache(inode, start, start + ram_size - 1, 0);
out_reserve:
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
out_unlock:
extent_clear_unlock_delalloc(inode, start, end, locked_page,
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
@@ -1056,17 +1102,17 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow->end = cur_end;
INIT_LIST_HEAD(&async_cow->extents);
- async_cow->work.func = async_cow_start;
- async_cow->work.ordered_func = async_cow_submit;
- async_cow->work.ordered_free = async_cow_free;
- async_cow->work.flags = 0;
+ btrfs_init_work(&async_cow->work,
+ btrfs_delalloc_helper,
+ async_cow_start, async_cow_submit,
+ async_cow_free);
nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
PAGE_CACHE_SHIFT;
atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
- btrfs_queue_worker(&root->fs_info->delalloc_workers,
- &async_cow->work);
+ btrfs_queue_work(root->fs_info->delalloc_workers,
+ &async_cow->work);
if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {
wait_event(root->fs_info->async_submit_wait,
@@ -1253,6 +1299,15 @@ next_slot:
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;
/*
+ * if there are pending snapshots for this root,
+ * we fall into common COW way.
+ */
+ if (!nolock) {
+ err = btrfs_start_nocow_write(root);
+ if (!err)
+ goto out_check;
+ }
+ /*
* force cow if csum exists in the range.
* this ensure that csum for a given extent are
* either valid or do not exist.
@@ -1262,7 +1317,8 @@ next_slot:
nocow = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
extent_end = found_key.offset +
- btrfs_file_extent_inline_len(leaf, fi);
+ btrfs_file_extent_inline_len(leaf,
+ path->slots[0], fi);
extent_end = ALIGN(extent_end, root->sectorsize);
} else {
BUG_ON(1);
@@ -1270,6 +1326,8 @@ next_slot:
out_check:
if (extent_end <= start) {
path->slots[0]++;
+ if (!nolock && nocow)
+ btrfs_end_nocow_write(root);
goto next_slot;
}
if (!nocow) {
@@ -1287,8 +1345,11 @@ out_check:
ret = cow_file_range(inode, locked_page,
cow_start, found_key.offset - 1,
page_started, nr_written, 1);
- if (ret)
+ if (ret) {
+ if (!nolock && nocow)
+ btrfs_end_nocow_write(root);
goto error;
+ }
cow_start = (u64)-1;
}
@@ -1335,8 +1396,11 @@ out_check:
BTRFS_DATA_RELOC_TREE_OBJECTID) {
ret = btrfs_reloc_clone_csums(inode, cur_offset,
num_bytes);
- if (ret)
+ if (ret) {
+ if (!nolock && nocow)
+ btrfs_end_nocow_write(root);
goto error;
+ }
}
extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1344,6 +1408,8 @@ out_check:
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC, PAGE_UNLOCK |
PAGE_SET_PRIVATE2);
+ if (!nolock && nocow)
+ btrfs_end_nocow_write(root);
cur_offset = extent_end;
if (cur_offset > end)
break;
@@ -1577,7 +1643,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
unsigned long bio_flags)
{
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- u64 logical = (u64)bio->bi_sector << 9;
+ u64 logical = (u64)bio->bi_iter.bi_sector << 9;
u64 length = 0;
u64 map_length;
int ret;
@@ -1585,7 +1651,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,
if (bio_flags & EXTENT_BIO_COMPRESSED)
return 0;
- length = bio->bi_size;
+ length = bio->bi_iter.bi_size;
map_length = length;
ret = btrfs_map_block(root->fs_info, rw, logical,
&map_length, NULL, 0);
@@ -1823,9 +1889,10 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
SetPageChecked(page);
page_cache_get(page);
- fixup->work.func = btrfs_writepage_fixup_worker;
+ btrfs_init_work(&fixup->work, btrfs_fixup_helper,
+ btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
- btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
+ btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);
return -EBUSY;
}
@@ -1841,14 +1908,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key ins;
+ int extent_inserted = 0;
int ret;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- path->leave_spinning = 1;
-
/*
* we may be replacing one extent in the tree with another.
* The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1924,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = btrfs_drop_extents(trans, root, inode, file_pos,
- file_pos + num_bytes, 0);
+ ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
+ file_pos + num_bytes, NULL, 0,
+ 1, sizeof(*fi), &extent_inserted);
if (ret)
goto out;
- ins.objectid = btrfs_ino(inode);
- ins.offset = file_pos;
- ins.type = BTRFS_EXTENT_DATA_KEY;
- ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
- if (ret)
- goto out;
+ if (!extent_inserted) {
+ ins.objectid = btrfs_ino(inode);
+ ins.offset = file_pos;
+ ins.type = BTRFS_EXTENT_DATA_KEY;
+
+ path->leave_spinning = 1;
+ ret = btrfs_insert_empty_item(trans, root, path, &ins,
+ sizeof(*fi));
+ if (ret)
+ goto out;
+ }
leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
@@ -2214,6 +2286,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
return PTR_ERR(root);
}
+ if (btrfs_root_readonly(root)) {
+ srcu_read_unlock(&fs_info->subvol_srcu, index);
+ return 0;
+ }
+
/* step 2: get inode */
key.objectid = backref->inum;
key.type = BTRFS_INODE_ITEM_KEY;
@@ -2290,7 +2367,7 @@ again:
u64 extent_len;
struct btrfs_key found_key;
- ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
goto out_free_path;
@@ -2543,12 +2620,21 @@ out_kfree:
return NULL;
}
-/*
- * helper function for btrfs_finish_ordered_io, this
- * just reads in some of the csum leaves to prime them into ram
- * before we start the transaction. It limits the amount of btree
- * reads required while inside the transaction.
- */
+static void btrfs_release_delalloc_bytes(struct btrfs_root *root,
+ u64 start, u64 len)
+{
+ struct btrfs_block_group_cache *cache;
+
+ cache = btrfs_lookup_block_group(root->fs_info, start);
+ ASSERT(cache);
+
+ spin_lock(&cache->lock);
+ cache->delalloc_bytes -= len;
+ spin_unlock(&cache->lock);
+
+ btrfs_put_block_group(cache);
+}
+
/* as ordered data IO finishes, this gets called so we can finish
* an ordered extent if the range of bytes in the file it covers are
* fully written.
@@ -2610,7 +2696,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 1, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
- if (last_snapshot >= BTRFS_I(inode)->generation)
+ if (0 && last_snapshot >= BTRFS_I(inode)->generation)
/* the inode is shared */
new = record_old_file_extents(inode, ordered_extent);
@@ -2628,6 +2714,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
trans = NULL;
goto out_unlock;
}
+
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -2647,6 +2734,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
logical_len, logical_len,
compress_type, 0, 0,
BTRFS_FILE_EXTENT_REG);
+ if (!ret)
+ btrfs_release_delalloc_bytes(root,
+ ordered_extent->start,
+ ordered_extent->disk_len);
}
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
ordered_extent->file_offset, ordered_extent->len,
@@ -2699,7 +2790,7 @@ out:
!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
btrfs_free_reserved_extent(root, ordered_extent->start,
- ordered_extent->disk_len);
+ ordered_extent->disk_len, 1);
}
@@ -2740,7 +2831,8 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ordered_extent *ordered_extent = NULL;
- struct btrfs_workers *workers;
+ struct btrfs_workqueue *wq;
+ btrfs_work_func_t func;
trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
@@ -2749,14 +2841,17 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
end - start + 1, uptodate))
return 0;
- ordered_extent->work.func = finish_ordered_fn;
- ordered_extent->work.flags = 0;
+ if (btrfs_is_free_space_inode(inode)) {
+ wq = root->fs_info->endio_freespace_worker;
+ func = btrfs_freespace_write_helper;
+ } else {
+ wq = root->fs_info->endio_write_workers;
+ func = btrfs_endio_write_helper;
+ }
- if (btrfs_is_free_space_inode(inode))
- workers = &root->fs_info->endio_freespace_worker;
- else
- workers = &root->fs_info->endio_write_workers;
- btrfs_queue_worker(workers, &ordered_extent->work);
+ btrfs_init_work(&ordered_extent->work, func, finish_ordered_fn, NULL,
+ NULL);
+ btrfs_queue_work(wq, &ordered_extent->work);
return 0;
}
@@ -2898,14 +2993,15 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
root->orphan_block_rsv = NULL;
spin_unlock(&root->orphan_lock);
- if (root->orphan_item_inserted &&
+ if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&
btrfs_root_refs(&root->root_item) > 0) {
ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
root->root_key.objectid);
if (ret)
btrfs_abort_transaction(trans, root, ret);
else
- root->orphan_item_inserted = 0;
+ clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+ &root->state);
}
if (block_rsv) {
@@ -3222,7 +3318,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
btrfs_block_rsv_release(root, root->orphan_block_rsv,
(u64)-1);
- if (root->orphan_block_rsv || root->orphan_item_inserted) {
+ if (root->orphan_block_rsv ||
+ test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {
trans = btrfs_join_transaction(root);
if (!IS_ERR(trans))
btrfs_end_transaction(trans, root);
@@ -3248,7 +3345,8 @@ out:
* slot is the slot the inode is in, objectid is the objectid of the inode
*/
static noinline int acls_after_inode_item(struct extent_buffer *leaf,
- int slot, u64 objectid)
+ int slot, u64 objectid,
+ int *first_xattr_slot)
{
u32 nritems = btrfs_header_nritems(leaf);
struct btrfs_key found_key;
@@ -3264,6 +3362,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
}
slot++;
+ *first_xattr_slot = -1;
while (slot < nritems) {
btrfs_item_key_to_cpu(leaf, &found_key, slot);
@@ -3273,6 +3372,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
/* we found an xattr, assume we've got an acl */
if (found_key.type == BTRFS_XATTR_ITEM_KEY) {
+ if (*first_xattr_slot == -1)
+ *first_xattr_slot = slot;
if (found_key.offset == xattr_access ||
found_key.offset == xattr_default)
return 1;
@@ -3301,6 +3402,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
* something larger than an xattr. We have to assume the inode
* has acls
*/
+ if (*first_xattr_slot == -1)
+ *first_xattr_slot = slot;
return 1;
}
@@ -3315,10 +3418,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
struct btrfs_timespec *tspec;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key location;
+ unsigned long ptr;
int maybe_acls;
u32 rdev;
int ret;
bool filled = false;
+ int first_xattr_slot;
ret = btrfs_fill_inode(inode, &rdev);
if (!ret)
@@ -3328,7 +3433,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
if (!path)
goto make_bad;
- path->leave_spinning = 1;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -3338,7 +3442,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
leaf = path->nodes[0];
if (filled)
- goto cache_acl;
+ goto cache_index;
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
@@ -3381,18 +3485,51 @@ static void btrfs_read_locked_inode(struct inode *inode)
BTRFS_I(inode)->index_cnt = (u64)-1;
BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
+
+cache_index:
+ path->slots[0]++;
+ if (inode->i_nlink != 1 ||
+ path->slots[0] >= btrfs_header_nritems(leaf))
+ goto cache_acl;
+
+ btrfs_item_key_to_cpu(leaf, &location, path->slots[0]);
+ if (location.objectid != btrfs_ino(inode))
+ goto cache_acl;
+
+ ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+ if (location.type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *ref;
+
+ ref = (struct btrfs_inode_ref *)ptr;
+ BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref);
+ } else if (location.type == BTRFS_INODE_EXTREF_KEY) {
+ struct btrfs_inode_extref *extref;
+
+ extref = (struct btrfs_inode_extref *)ptr;
+ BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf,
+ extref);
+ }
cache_acl:
/*
* try to precache a NULL acl entry for files that don't have
* any xattrs or acls
*/
maybe_acls = acls_after_inode_item(leaf, path->slots[0],
- btrfs_ino(inode));
+ btrfs_ino(inode), &first_xattr_slot);
+ if (first_xattr_slot != -1) {
+ path->slots[0] = first_xattr_slot;
+ ret = btrfs_load_inode_props(inode, path);
+ if (ret)
+ btrfs_err(root->fs_info,
+ "error loading props for ino %llu (root %llu): %d",
+ btrfs_ino(inode),
+ root->root_key.objectid, ret);
+ }
+ btrfs_free_path(path);
+
if (!maybe_acls)
cache_no_acl(inode);
- btrfs_free_path(path);
-
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_mapping->a_ops = &btrfs_aops;
@@ -3496,7 +3633,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
goto failed;
}
- btrfs_unlock_up_safe(path, 1);
leaf = path->nodes[0];
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
@@ -3593,6 +3729,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
goto err;
btrfs_release_path(path);
+ /*
+ * If we don't have dir index, we have to get it by looking up
+ * the inode ref, since we get the inode ref, remove it directly,
+ * it is unnecessary to do delayed deletion.
+ *
+ * But if we have dir index, needn't search inode ref to get it.
+ * Since the inode ref is close to the inode item, it is better
+ * that we delay to delete it, and just do this deletion when
+ * we update the inode item.
+ */
+ if (BTRFS_I(inode)->dir_index) {
+ ret = btrfs_delayed_delete_inode_ref(inode);
+ if (!ret) {
+ index = BTRFS_I(inode)->dir_index;
+ goto skip_backref;
+ }
+ }
+
ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
dir_ino, &index);
if (ret) {
@@ -3602,7 +3756,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, root, ret);
goto err;
}
-
+skip_backref:
ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -3892,7 +4046,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
* not block aligned since we will be keeping the last block of the
* extent just the way it is.
*/
- if (root->ref_cows || root == root->fs_info->tree_root)
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+ root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, ALIGN(new_size,
root->sectorsize), (u64)-1, 0);
@@ -3948,7 +4103,7 @@ search_again:
btrfs_file_extent_num_bytes(leaf, fi);
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
item_end += btrfs_file_extent_inline_len(leaf,
- fi);
+ path->slots[0], fi);
}
item_end--;
}
@@ -3985,7 +4140,9 @@ search_again:
extent_num_bytes);
num_dec = (orig_num_bytes -
extent_num_bytes);
- if (root->ref_cows && extent_start != 0)
+ if (test_bit(BTRFS_ROOT_REF_COWS,
+ &root->state) &&
+ extent_start != 0)
inode_sub_bytes(inode, num_dec);
btrfs_mark_buffer_dirty(leaf);
} else {
@@ -3999,7 +4156,8 @@ search_again:
num_dec = btrfs_file_extent_num_bytes(leaf, fi);
if (extent_start != 0) {
found_extent = 1;
- if (root->ref_cows)
+ if (test_bit(BTRFS_ROOT_REF_COWS,
+ &root->state))
inode_sub_bytes(inode, num_dec);
}
}
@@ -4014,14 +4172,20 @@ search_again:
btrfs_file_extent_other_encoding(leaf, fi) == 0) {
u32 size = new_size - found_key.offset;
- if (root->ref_cows) {
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
inode_sub_bytes(inode, item_end + 1 -
new_size);
- }
+
+ /*
+ * update the ram bytes to properly reflect
+ * the new size of our item
+ */
+ btrfs_set_file_extent_ram_bytes(leaf, fi, size);
size =
btrfs_file_extent_calc_inline_size(size);
btrfs_truncate_item(root, path, size, 1);
- } else if (root->ref_cows) {
+ } else if (test_bit(BTRFS_ROOT_REF_COWS,
+ &root->state)) {
inode_sub_bytes(inode, item_end + 1 -
found_key.offset);
}
@@ -4043,8 +4207,9 @@ delete:
} else {
break;
}
- if (found_extent && (root->ref_cows ||
- root == root->fs_info->tree_root)) {
+ if (found_extent &&
+ (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
+ root == root->fs_info->tree_root)) {
btrfs_set_path_blocking(path);
ret = btrfs_free_extent(trans, root, extent_start,
extent_num_bytes, 0,
@@ -4083,7 +4248,8 @@ out:
btrfs_abort_transaction(trans, root, ret);
}
error:
- if (last_size != (u64)-1)
+ if (last_size != (u64)-1 &&
+ root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
btrfs_ordered_update_i_size(inode, last_size, NULL);
btrfs_free_path(path);
return err;
@@ -4203,6 +4369,49 @@ out:
return ret;
}
+static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode,
+ u64 offset, u64 len)
+{
+ struct btrfs_trans_handle *trans;
+ int ret;
+
+ /*
+ * Still need to make sure the inode looks like it's been updated so
+ * that any holes get logged if we fsync.
+ */
+ if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) {
+ BTRFS_I(inode)->last_trans = root->fs_info->generation;
+ BTRFS_I(inode)->last_sub_trans = root->log_transid;
+ BTRFS_I(inode)->last_log_commit = root->last_log_commit;
+ return 0;
+ }
+
+ /*
+ * 1 - for the one we're dropping
+ * 1 - for the one we're adding
+ * 1 - for updating the inode.
+ */
+ trans = btrfs_start_transaction(root, 3);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ btrfs_end_transaction(trans, root);
+ return ret;
+ }
+
+ ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
+ 0, 0, len, 0, len, 0, 0, 0);
+ if (ret)
+ btrfs_abort_transaction(trans, root, ret);
+ else
+ btrfs_update_inode(trans, root, inode);
+ btrfs_end_transaction(trans, root);
+ return ret;
+}
+
/*
* This function puts in dummy file extents for the area we're creating a hole
* for. So if we are truncating this file to a larger size we need to insert
@@ -4211,7 +4420,6 @@ out:
*/
int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
{
- struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map *em = NULL;
@@ -4266,31 +4474,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
struct extent_map *hole_em;
hole_size = last_byte - cur_offset;
- trans = btrfs_start_transaction(root, 3);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- break;
- }
-
- err = btrfs_drop_extents(trans, root, inode,
- cur_offset,
- cur_offset + hole_size, 1);
- if (err) {
- btrfs_abort_transaction(trans, root, err);
- btrfs_end_transaction(trans, root);
- break;
- }
-
- err = btrfs_insert_file_extent(trans, root,
- btrfs_ino(inode), cur_offset, 0,
- 0, hole_size, 0, hole_size,
- 0, 0, 0);
- if (err) {
- btrfs_abort_transaction(trans, root, err);
- btrfs_end_transaction(trans, root);
+ err = maybe_insert_hole(root, inode, cur_offset,
+ hole_size);
+ if (err)
break;
- }
-
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
@@ -4309,7 +4496,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_em->ram_bytes = hole_size;
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
- hole_em->generation = trans->transid;
+ hole_em->generation = root->fs_info->generation;
while (1) {
write_lock(&em_tree->lock);
@@ -4322,17 +4509,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_size - 1, 0);
}
free_extent_map(hole_em);
-next:
- btrfs_update_inode(trans, root, inode);
- btrfs_end_transaction(trans, root);
}
+next:
free_extent_map(em);
em = NULL;
cur_offset = last_byte;
if (cur_offset >= block_end)
break;
}
-
free_extent_map(em);
unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
GFP_NOFS);
@@ -4354,8 +4538,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+ if (newsize != oldsize) {
+ inode_inc_iversion(inode);
+ if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
+ inode->i_ctime = inode->i_mtime =
+ current_fs_time(inode->i_sb);
+ }
if (newsize > oldsize) {
truncate_pagecache(inode, newsize);
@@ -4464,12 +4652,76 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
err = btrfs_dirty_inode(inode);
if (!err && attr->ia_valid & ATTR_MODE)
- err = btrfs_acl_chmod(inode);
+ err = posix_acl_chmod(inode, inode->i_mode);
}
return err;
}
+/*
+ * While truncating the inode pages during eviction, we get the VFS calling
+ * btrfs_invalidatepage() against each page of the inode. This is slow because
+ * the calls to btrfs_invalidatepage() result in a huge amount of calls to
+ * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
+ * extent_state structures over and over, wasting lots of time.
+ *
+ * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
+ * those expensive operations on a per page basis and do only the ordered io
+ * finishing, while we release here the extent_map and extent_state structures,
+ * without the excessive merging and splitting.
+ */
+static void evict_inode_truncate_pages(struct inode *inode)
+{
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
+ struct rb_node *node;
+
+ ASSERT(inode->i_state & I_FREEING);
+ truncate_inode_pages_final(&inode->i_data);
+
+ write_lock(&map_tree->lock);
+ while (!RB_EMPTY_ROOT(&map_tree->map)) {
+ struct extent_map *em;
+
+ node = rb_first(&map_tree->map);
+ em = rb_entry(node, struct extent_map, rb_node);
+ clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+ clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
+ remove_extent_mapping(map_tree, em);
+ free_extent_map(em);
+ if (need_resched()) {
+ write_unlock(&map_tree->lock);
+ cond_resched();
+ write_lock(&map_tree->lock);
+ }
+ }
+ write_unlock(&map_tree->lock);
+
+ spin_lock(&io_tree->lock);
+ while (!RB_EMPTY_ROOT(&io_tree->state)) {
+ struct extent_state *state;
+ struct extent_state *cached_state = NULL;
+
+ node = rb_first(&io_tree->state);
+ state = rb_entry(node, struct extent_state, rb_node);
+ atomic_inc(&state->refs);
+ spin_unlock(&io_tree->lock);
+
+ lock_extent_bits(io_tree, state->start, state->end,
+ 0, &cached_state);
+ clear_extent_bit(io_tree, state->start, state->end,
+ EXTENT_LOCKED | EXTENT_DIRTY |
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 1,
+ &cached_state, GFP_NOFS);
+ free_extent_state(state);
+
+ cond_resched();
+ spin_lock(&io_tree->lock);
+ }
+ spin_unlock(&io_tree->lock);
+}
+
void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
@@ -4480,7 +4732,8 @@ void btrfs_evict_inode(struct inode *inode)
trace_btrfs_inode_evict(inode);
- truncate_inode_pages(&inode->i_data, 0);
+ evict_inode_truncate_pages(inode);
+
if (inode->i_nlink &&
((btrfs_root_refs(&root->root_item) != 0 &&
root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -4655,9 +4908,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,
}
err = -ENOENT;
- ret = btrfs_find_root_ref(root->fs_info->tree_root, path,
- BTRFS_I(dir)->root->root_key.objectid,
- location->objectid);
+ ret = btrfs_find_item(root->fs_info->tree_root, path,
+ BTRFS_I(dir)->root->root_key.objectid,
+ location->objectid, BTRFS_ROOT_REF_KEY, NULL);
if (ret) {
if (ret < 0)
err = ret;
@@ -4761,7 +5014,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root)
struct inode *inode;
u64 objectid = 0;
- WARN_ON(btrfs_root_refs(&root->root_item) != 0);
+ if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
+ WARN_ON(btrfs_root_refs(&root->root_item) != 0);
spin_lock(&root->inode_lock);
again:
@@ -4818,7 +5072,9 @@ again:
static int btrfs_init_locked_inode(struct inode *inode, void *p)
{
struct btrfs_iget_args *args = p;
- inode->i_ino = args->ino;
+ inode->i_ino = args->location->objectid;
+ memcpy(&BTRFS_I(inode)->location, args->location,
+ sizeof(*args->location));
BTRFS_I(inode)->root = args->root;
return 0;
}
@@ -4826,19 +5082,19 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
static int btrfs_find_actor(struct inode *inode, void *opaque)
{
struct btrfs_iget_args *args = opaque;
- return args->ino == btrfs_ino(inode) &&
+ return args->location->objectid == BTRFS_I(inode)->location.objectid &&
args->root == BTRFS_I(inode)->root;
}
static struct inode *btrfs_iget_locked(struct super_block *s,
- u64 objectid,
+ struct btrfs_key *location,
struct btrfs_root *root)
{
struct inode *inode;
struct btrfs_iget_args args;
- unsigned long hashval = btrfs_inode_hash(objectid, root);
+ unsigned long hashval = btrfs_inode_hash(location->objectid, root);
- args.ino = objectid;
+ args.location = location;
args.root = root;
inode = iget5_locked(s, hashval, btrfs_find_actor,
@@ -4855,13 +5111,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
{
struct inode *inode;
- inode = btrfs_iget_locked(s, location->objectid, root);
+ inode = btrfs_iget_locked(s, location, root);
if (!inode)
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
btrfs_read_locked_inode(inode);
if (!is_bad_inode(inode)) {
inode_tree_add(inode);
@@ -4917,7 +5171,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
return ERR_PTR(ret);
if (location.objectid == 0)
- return NULL;
+ return ERR_PTR(-ENOENT);
if (location.type == BTRFS_INODE_ITEM_KEY) {
inode = btrfs_iget(dir->i_sb, &location, root, NULL);
@@ -4948,6 +5202,42 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
iput(inode);
inode = ERR_PTR(ret);
}
+ /*
+ * If orphan cleanup did remove any orphans, it means the tree
+ * was modified and therefore the commit root is not the same as
+ * the current root anymore. This is a problem, because send
+ * uses the commit root and therefore can see inode items that
+ * don't exist in the current root anymore, and for example make
+ * calls to btrfs_iget, which will do tree lookups based on the
+ * current root and not on the commit root. Those lookups will
+ * fail, returning a -ESTALE error, and making send fail with
+ * that error. So make sure a send does not see any orphans we
+ * have just removed, and that it will see the same inodes
+ * regardless of whether a transaction commit happened before
+ * it started (meaning that the commit root will be the same as
+ * the current root) or not.
+ */
+ if (sub_root->node != sub_root->commit_root) {
+ u64 sub_flags = btrfs_root_flags(&sub_root->root_item);
+
+ if (sub_flags & BTRFS_ROOT_SUBVOL_RDONLY) {
+ struct extent_buffer *eb;
+
+ /*
+ * Assert we can't have races between dentry
+ * lookup called through the snapshot creation
+ * ioctl and the VFS.
+ */
+ ASSERT(mutex_is_locked(&dir->i_mutex));
+
+ down_write(&root->fs_info->commit_root_sem);
+ eb = sub_root->commit_root;
+ sub_root->commit_root =
+ btrfs_root_node(sub_root);
+ up_write(&root->fs_info->commit_root_sem);
+ free_extent_buffer(eb);
+ }
+ }
}
return inode;
@@ -4974,17 +5264,23 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
static void btrfs_dentry_release(struct dentry *dentry)
{
- if (dentry->d_fsdata)
- kfree(dentry->d_fsdata);
+ kfree(dentry->d_fsdata);
}
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
- struct dentry *ret;
+ struct inode *inode;
- ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
- return ret;
+ inode = btrfs_lookup_dentry(dir, dentry);
+ if (IS_ERR(inode)) {
+ if (PTR_ERR(inode) == -ENOENT)
+ inode = NULL;
+ else
+ return ERR_CAST(inode);
+ }
+
+ return d_materialise_unique(dentry, inode);
}
unsigned char btrfs_filetype_table[] = {
@@ -5338,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
return ret;
}
+static int btrfs_insert_inode_locked(struct inode *inode)
+{
+ struct btrfs_iget_args args;
+ args.location = &BTRFS_I(inode)->location;
+ args.root = BTRFS_I(inode)->root;
+
+ return insert_inode_locked4(inode,
+ btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
+ btrfs_find_actor, &args);
+}
+
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir,
@@ -5352,9 +5659,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode_ref *ref;
struct btrfs_key key[2];
u32 sizes[2];
+ int nitems = name ? 2 : 1;
unsigned long ptr;
int ret;
- int owner;
path = btrfs_alloc_path();
if (!path)
@@ -5367,12 +5674,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
}
/*
+ * O_TMPFILE, set link count to 0, so that after this point,
+ * we fill in an inode item with the correct link count.
+ */
+ if (!name)
+ set_nlink(inode, 0);
+
+ /*
* we have to initialize this early, so we can reclaim the inode
* number if we fail afterwards in this function.
*/
inode->i_ino = objectid;
- if (dir) {
+ if (dir && name) {
trace_btrfs_inode_request(dir);
ret = btrfs_set_inode_index(dir, index);
@@ -5381,6 +5695,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
iput(inode);
return ERR_PTR(ret);
}
+ } else if (dir) {
+ *index = 0;
}
/*
* index_cnt is ignored for everything but a dir,
@@ -5388,6 +5704,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
* number
*/
BTRFS_I(inode)->index_cnt = 2;
+ BTRFS_I(inode)->dir_index = *index;
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
@@ -5400,32 +5717,39 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
*/
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
- if (S_ISDIR(mode))
- owner = 0;
- else
- owner = 1;
-
key[0].objectid = objectid;
btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
key[0].offset = 0;
- /*
- * Start new inodes with an inode_ref. This is slightly more
- * efficient for small numbers of hard links since they will
- * be packed into one item. Extended refs will kick in if we
- * add more hard links than can fit in the ref item.
- */
- key[1].objectid = objectid;
- btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
- key[1].offset = ref_objectid;
-
sizes[0] = sizeof(struct btrfs_inode_item);
- sizes[1] = name_len + sizeof(*ref);
+
+ if (name) {
+ /*
+ * Start new inodes with an inode_ref. This is slightly more
+ * efficient for small numbers of hard links since they will
+ * be packed into one item. Extended refs will kick in if we
+ * add more hard links than can fit in the ref item.
+ */
+ key[1].objectid = objectid;
+ btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
+ key[1].offset = ref_objectid;
+
+ sizes[1] = name_len + sizeof(*ref);
+ }
+
+ location = &BTRFS_I(inode)->location;
+ location->objectid = objectid;
+ location->offset = 0;
+ btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
+
+ ret = btrfs_insert_inode_locked(inode);
+ if (ret < 0)
+ goto fail;
path->leave_spinning = 1;
- ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
+ ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
if (ret != 0)
- goto fail;
+ goto fail_unlock;
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
@@ -5436,21 +5760,18 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
sizeof(*inode_item));
fill_inode_item(trans, path->nodes[0], inode_item, inode);
- ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
- struct btrfs_inode_ref);
- btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
- btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
- ptr = (unsigned long)(ref + 1);
- write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ if (name) {
+ ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
+ struct btrfs_inode_ref);
+ btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
+ btrfs_set_inode_ref_index(path->nodes[0], ref, *index);
+ ptr = (unsigned long)(ref + 1);
+ write_extent_buffer(path->nodes[0], name, ptr, name_len);
+ }
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path);
- location = &BTRFS_I(inode)->location;
- location->objectid = objectid;
- location->offset = 0;
- btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-
btrfs_inherit_iflags(inode, dir);
if (S_ISREG(mode)) {
@@ -5461,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_INODE_NODATASUM;
}
- btrfs_insert_inode_hash(inode);
inode_tree_add(inode);
trace_btrfs_inode_new(inode);
@@ -5469,9 +5789,18 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_update_root_times(trans, root);
+ ret = btrfs_inode_inherit_props(trans, inode, dir);
+ if (ret)
+ btrfs_err(root->fs_info,
+ "error inheriting props for ino %llu (root %llu): %d",
+ btrfs_ino(inode), root->root_key.objectid, ret);
+
return inode;
+
+fail_unlock:
+ unlock_new_inode(inode);
fail:
- if (dir)
+ if (dir && name)
BTRFS_I(dir)->index_cnt--;
btrfs_free_path(path);
iput(inode);
@@ -5604,36 +5933,43 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
* if the filesystem supports xattrs by looking at the
* ops vector.
*/
-
inode->i_op = &btrfs_special_inode_operations;
- err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ init_special_inode(inode, inode->i_mode, rdev);
+
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- drop_inode = 1;
- else {
- init_special_inode(inode, inode->i_mode, rdev);
+ goto out_unlock_inode;
+
+ err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+ if (err) {
+ goto out_unlock_inode;
+ } else {
btrfs_update_inode(trans, root, inode);
+ unlock_new_inode(inode);
d_instantiate(dentry, inode);
}
+
out_unlock:
btrfs_end_transaction(trans, root);
+ btrfs_balance_delayed_items(root);
btrfs_btree_balance_dirty(root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
return err;
+
+out_unlock_inode:
+ drop_inode = 1;
+ unlock_new_inode(inode);
+ goto out_unlock;
+
}
static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -5668,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
drop_inode_on_err = 1;
-
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err)
- goto out_unlock;
-
- err = btrfs_update_inode(trans, root, inode);
- if (err)
- goto out_unlock;
-
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -5685,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
*/
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+ if (err)
+ goto out_unlock_inode;
+
+ err = btrfs_update_inode(trans, root, inode);
+ if (err)
+ goto out_unlock_inode;
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
- goto out_unlock;
+ goto out_unlock_inode;
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+ unlock_new_inode(inode);
d_instantiate(dentry, inode);
out_unlock:
@@ -5701,8 +6037,14 @@ out_unlock:
inode_dec_link_count(inode);
iput(inode);
}
+ btrfs_balance_delayed_items(root);
btrfs_btree_balance_dirty(root);
return err;
+
+out_unlock_inode:
+ unlock_new_inode(inode);
+ goto out_unlock;
+
}
static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -5737,6 +6079,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
goto fail;
}
+ /* There are several dir indexes for this inode, clear the cache. */
+ BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
@@ -5752,11 +6096,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
err = btrfs_update_inode(trans, root, inode);
if (err)
goto fail;
+ if (inode->i_nlink == 1) {
+ /*
+ * If new hard link count is 1, it's a file created
+ * with open(2) O_TMPFILE flag.
+ */
+ err = btrfs_orphan_del(trans, inode);
+ if (err)
+ goto fail;
+ }
d_instantiate(dentry, inode);
btrfs_log_new_name(trans, inode, NULL, parent);
}
btrfs_end_transaction(trans, root);
+ btrfs_balance_delayed_items(root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
@@ -5798,33 +6152,43 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
}
drop_on_err = 1;
+ /* these must be set before we unlock the inode */
+ inode->i_op = &btrfs_dir_inode_operations;
+ inode->i_fop = &btrfs_dir_file_operations;
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
if (err)
- goto out_fail;
-
- inode->i_op = &btrfs_dir_inode_operations;
- inode->i_fop = &btrfs_dir_file_operations;
+ goto out_fail_inode;
btrfs_i_size_write(inode, 0);
err = btrfs_update_inode(trans, root, inode);
if (err)
- goto out_fail;
+ goto out_fail_inode;
err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
dentry->d_name.len, 0, index);
if (err)
- goto out_fail;
+ goto out_fail_inode;
d_instantiate(dentry, inode);
+ /*
+ * mkdir is special. We're unlocking after we call d_instantiate
+ * to avoid a race with nfsd calling d_instantiate.
+ */
+ unlock_new_inode(inode);
drop_on_err = 0;
out_fail:
btrfs_end_transaction(trans, root);
if (drop_on_err)
iput(inode);
+ btrfs_balance_delayed_items(root);
btrfs_btree_balance_dirty(root);
return err;
+
+out_fail_inode:
+ unlock_new_inode(inode);
+ goto out_fail;
}
/* helper for btfs_get_extent. Given an existing extent in the tree,
@@ -5834,14 +6198,14 @@ out_fail:
static int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
struct extent_map *em,
- u64 map_start, u64 map_len)
+ u64 map_start)
{
u64 start_diff;
BUG_ON(map_start < em->start || map_start >= extent_map_end(em));
start_diff = map_start - em->start;
em->start = map_start;
- em->len = map_len;
+ em->len = existing->start - em->start;
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
em->block_start += start_diff;
@@ -5878,16 +6242,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,
max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
ret = btrfs_decompress(compress_type, tmp, page,
extent_offset, inline_size, max_size);
- if (ret) {
- char *kaddr = kmap_atomic(page);
- unsigned long copy_size = min_t(u64,
- PAGE_CACHE_SIZE - pg_offset,
- max_size - extent_offset);
- memset(kaddr + pg_offset, 0, copy_size);
- kunmap_atomic(kaddr);
- }
kfree(tmp);
- return 0;
+ return ret;
}
/*
@@ -5905,7 +6261,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
{
int ret;
int err = 0;
- u64 bytenr;
u64 extent_start = 0;
u64 extent_end = 0;
u64 objectid = btrfs_ino(inode);
@@ -5919,7 +6274,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_trans_handle *trans = NULL;
- int compress_type;
+ const bool new_inline = !page || create;
again:
read_lock(&em_tree->lock);
@@ -5993,14 +6348,13 @@ again:
found_type = btrfs_file_extent_type(leaf, item);
extent_start = found_key.offset;
- compress_type = btrfs_file_extent_compression(leaf, item);
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, item);
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
- size = btrfs_file_extent_inline_len(leaf, item);
+ size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
next:
@@ -6022,38 +6376,18 @@ next:
goto not_found;
if (start + len <= found_key.offset)
goto not_found;
+ if (start > found_key.offset)
+ goto next;
em->start = start;
em->orig_start = start;
em->len = found_key.offset - start;
goto not_found_em;
}
- em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item);
+ btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em);
+
if (found_type == BTRFS_FILE_EXTENT_REG ||
found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- em->start = extent_start;
- em->len = extent_end - extent_start;
- em->orig_start = extent_start -
- btrfs_file_extent_offset(leaf, item);
- em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
- item);
- bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
- if (bytenr == 0) {
- em->block_start = EXTENT_MAP_HOLE;
- goto insert;
- }
- if (compress_type != BTRFS_COMPRESS_NONE) {
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- em->compress_type = compress_type;
- em->block_start = bytenr;
- em->block_len = em->orig_block_len;
- } else {
- bytenr += btrfs_file_extent_offset(leaf, item);
- em->block_start = bytenr;
- em->block_len = em->len;
- if (found_type == BTRFS_FILE_EXTENT_PREALLOC)
- set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
- }
goto insert;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
unsigned long ptr;
@@ -6062,14 +6396,10 @@ next:
size_t extent_offset;
size_t copy_size;
- em->block_start = EXTENT_MAP_INLINE;
- if (!page || create) {
- em->start = extent_start;
- em->len = extent_end - extent_start;
+ if (new_inline)
goto out;
- }
- size = btrfs_file_extent_inline_len(leaf, item);
+ size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
extent_offset = page_offset(page) + pg_offset - extent_start;
copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
size - extent_offset);
@@ -6077,10 +6407,6 @@ next:
em->len = ALIGN(copy_size, root->sectorsize);
em->orig_block_len = em->len;
em->orig_start = em->start;
- if (compress_type) {
- set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
- em->compress_type = compress_type;
- }
ptr = btrfs_file_extent_inline_start(item) + extent_offset;
if (create == 0 && !PageUptodate(page)) {
if (btrfs_file_extent_compression(leaf, item) !=
@@ -6088,7 +6414,10 @@ next:
ret = uncompress_inline(path, inode, page,
pg_offset,
extent_offset, item);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret) {
+ err = ret;
+ goto out;
+ }
} else {
map = kmap(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
@@ -6124,8 +6453,6 @@ next:
set_extent_uptodate(io_tree, em->start,
extent_map_end(em) - 1, NULL, GFP_NOFS);
goto insert;
- } else {
- WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);
}
not_found:
em->start = start;
@@ -6166,8 +6493,7 @@ insert:
em->len);
if (existing) {
err = merge_extent_mapping(em_tree, existing,
- em, start,
- root->sectorsize);
+ em, start);
free_extent_map(existing);
if (err) {
free_extent_map(em);
@@ -6342,21 +6668,21 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
alloc_hint = get_extent_allocation_hint(inode, start, len);
ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
- alloc_hint, &ins, 1);
+ alloc_hint, &ins, 1, 1);
if (ret)
return ERR_PTR(ret);
em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
ins.offset, ins.offset, ins.offset, 0);
if (IS_ERR(em)) {
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
return em;
}
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
if (ret) {
- btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+ btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);
free_extent_map(em);
return ERR_PTR(ret);
}
@@ -6377,6 +6703,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
int ret;
struct extent_buffer *leaf;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
u64 disk_bytenr;
@@ -6386,6 +6713,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
int slot;
int found_type;
bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW);
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -6429,6 +6757,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)
goto out;
+ extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+ if (extent_end <= offset)
+ goto out;
+
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (disk_bytenr == 0)
goto out;
@@ -6446,10 +6778,22 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
}
- extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
-
if (btrfs_extent_readonly(root, disk_bytenr))
goto out;
+
+ num_bytes = min(offset + *len, extent_end) - offset;
+ if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ u64 range_end;
+
+ range_end = round_up(offset + num_bytes, root->sectorsize) - 1;
+ ret = test_range_bit(io_tree, offset, range_end,
+ EXTENT_DELALLOC, 0, NULL);
+ if (ret) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
btrfs_release_path(path);
/*
@@ -6478,7 +6822,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
*/
disk_bytenr += backref_offset;
disk_bytenr += offset - key.offset;
- num_bytes = min(offset + *len, extent_end) - offset;
if (csum_exist_in_range(root, disk_bytenr, num_bytes))
goto out;
/*
@@ -6492,6 +6835,76 @@ out:
return ret;
}
+bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
+{
+ struct radix_tree_root *root = &inode->i_mapping->page_tree;
+ int found = false;
+ void **pagep = NULL;
+ struct page *page = NULL;
+ int start_idx;
+ int end_idx;
+
+ start_idx = start >> PAGE_CACHE_SHIFT;
+
+ /*
+ * end is the last byte in the last page. end == start is legal
+ */
+ end_idx = end >> PAGE_CACHE_SHIFT;
+
+ rcu_read_lock();
+
+ /* Most of the code in this while loop is lifted from
+ * find_get_page. It's been modified to begin searching from a
+ * page and return just the first page found in that range. If the
+ * found idx is less than or equal to the end idx then we know that
+ * a page exists. If no pages are found or if those pages are
+ * outside of the range then we're fine (yay!) */
+ while (page == NULL &&
+ radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) {
+ page = radix_tree_deref_slot(pagep);
+ if (unlikely(!page))
+ break;
+
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ page = NULL;
+ continue;
+ }
+ /*
+ * Otherwise, shmem/tmpfs must be storing a swap entry
+ * here as an exceptional entry: so return it without
+ * attempting to raise page count.
+ */
+ page = NULL;
+ break; /* TODO: Is this relevant for this use case? */
+ }
+
+ if (!page_cache_get_speculative(page)) {
+ page = NULL;
+ continue;
+ }
+
+ /*
+ * Has the page moved?
+ * This is part of the lockless pagecache protocol. See
+ * include/linux/pagemap.h for details.
+ */
+ if (unlikely(page != *pagep)) {
+ page_cache_release(page);
+ page = NULL;
+ }
+ }
+
+ if (page) {
+ if (page->index <= end_idx)
+ found = true;
+ page_cache_release(page);
+ }
+
+ rcu_read_unlock();
+ return found;
+}
+
static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
struct extent_state **cached_state, int writing)
{
@@ -6516,10 +6929,9 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* invalidate needs to happen so that reads after a write do not
* get stale data.
*/
- if (!ordered && (!writing ||
- !test_range_bit(&BTRFS_I(inode)->io_tree,
- lockstart, lockend, EXTENT_UPTODATE, 0,
- *cached_state)))
+ if (!ordered &&
+ (!writing ||
+ !btrfs_page_exists_in_range(inode, lockstart, lockend)))
break;
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
@@ -6779,17 +7191,16 @@ unlock_err:
static void btrfs_endio_direct_read(struct bio *bio, int err)
{
struct btrfs_dio_private *dip = bio->bi_private;
- struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
- struct bio_vec *bvec = bio->bi_io_vec;
+ struct bio_vec *bvec;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *dio_bio;
u32 *csums = (u32 *)dip->csum;
- int index = 0;
u64 start;
+ int i;
start = dip->logical_offset;
- do {
+ bio_for_each_segment_all(bvec, bio, i) {
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
struct page *page = bvec->bv_page;
char *kaddr;
@@ -6805,18 +7216,16 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
local_irq_restore(flags);
flush_dcache_page(bvec->bv_page);
- if (csum != csums[index]) {
+ if (csum != csums[i]) {
btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
btrfs_ino(inode), start, csum,
- csums[index]);
+ csums[i]);
err = -EIO;
}
}
start += bvec->bv_len;
- bvec++;
- index++;
- } while (bvec <= bvec_end);
+ }
unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
@@ -6851,10 +7260,10 @@ again:
if (!ret)
goto out_test;
- ordered->work.func = finish_ordered_fn;
- ordered->work.flags = 0;
- btrfs_queue_worker(&root->fs_info->endio_write_workers,
- &ordered->work);
+ btrfs_init_work(&ordered->work, btrfs_endio_write_helper,
+ finish_ordered_fn, NULL, NULL);
+ btrfs_queue_work(root->fs_info->endio_write_workers,
+ &ordered->work);
out_test:
/*
* our bio might span multiple ordered extents. If we haven't
@@ -6894,17 +7303,18 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
struct btrfs_dio_private *dip = bio->bi_private;
if (err) {
- printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
- "sector %#Lx len %u err no %d\n",
+ btrfs_err(BTRFS_I(dip->inode)->root->fs_info,
+ "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",
btrfs_ino(dip->inode), bio->bi_rw,
- (unsigned long long)bio->bi_sector, bio->bi_size, err);
+ (unsigned long long)bio->bi_iter.bi_sector,
+ bio->bi_iter.bi_size, err);
dip->errors = 1;
/*
* before atomic variable goto zero, we must make sure
* dip->errors is perceived to be set.
*/
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
}
/* if there are more bios still pending for this dio, just exit */
@@ -6988,7 +7398,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
struct bio_vec *bvec = orig_bio->bi_io_vec;
- u64 start_sector = orig_bio->bi_sector;
+ u64 start_sector = orig_bio->bi_iter.bi_sector;
u64 file_offset = dip->logical_offset;
u64 submit_len = 0;
u64 map_length;
@@ -6996,15 +7406,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int ret = 0;
int async_submit = 0;
- map_length = orig_bio->bi_size;
+ map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
&map_length, NULL, 0);
- if (ret) {
- bio_put(orig_bio);
+ if (ret)
return -EIO;
- }
- if (map_length >= orig_bio->bi_size) {
+ if (map_length >= orig_bio->bi_iter.bi_size) {
bio = orig_bio;
goto submit;
}
@@ -7019,6 +7427,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
if (!bio)
return -ENOMEM;
+
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
atomic_inc(&dip->pending_bios);
@@ -7056,7 +7465,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_private = dip;
bio->bi_end_io = btrfs_end_dio_bio;
- map_length = orig_bio->bi_size;
+ map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw,
start_sector << 9,
&map_length, NULL, 0);
@@ -7084,7 +7493,7 @@ out_err:
* before atomic variable goto zero, we must
* make sure dip->errors is perceived to be set.
*/
- smp_mb__before_atomic_dec();
+ smp_mb__before_atomic();
if (atomic_dec_and_test(&dip->pending_bios))
bio_io_error(dip->orig_bio);
@@ -7114,7 +7523,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
if (!skip_sum && !write) {
csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
- sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+ sum_len = dio_bio->bi_iter.bi_size >>
+ inode->i_sb->s_blocksize_bits;
sum_len *= csum_size;
} else {
sum_len = 0;
@@ -7129,8 +7539,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
dip->private = dio_bio->bi_private;
dip->inode = inode;
dip->logical_offset = file_offset;
- dip->bytes = dio_bio->bi_size;
- dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+ dip->bytes = dio_bio->bi_iter.bi_size;
+ dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
io_bio->bi_private = dip;
dip->errors = 0;
dip->orig_bio = io_bio;
@@ -7160,7 +7570,7 @@ free_ordered:
if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
btrfs_free_reserved_extent(root, ordered->start,
- ordered->disk_len);
+ ordered->disk_len, 1);
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
@@ -7168,39 +7578,30 @@ free_ordered:
}
static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
+ const struct iov_iter *iter, loff_t offset)
{
int seg;
int i;
- size_t size;
- unsigned long addr;
unsigned blocksize_mask = root->sectorsize - 1;
ssize_t retval = -EINVAL;
- loff_t end = offset;
if (offset & blocksize_mask)
goto out;
- /* Check the memory alignment. Blocks cannot straddle pages */
- for (seg = 0; seg < nr_segs; seg++) {
- addr = (unsigned long)iov[seg].iov_base;
- size = iov[seg].iov_len;
- end += size;
- if ((addr & blocksize_mask) || (size & blocksize_mask))
- goto out;
-
- /* If this is a write we don't need to check anymore */
- if (rw & WRITE)
- continue;
+ if (iov_iter_alignment(iter) & blocksize_mask)
+ goto out;
- /*
- * Check to make sure we don't have duplicate iov_base's in this
- * iovec, if so return EINVAL, otherwise we'll get csum errors
- * when reading back.
- */
- for (i = seg + 1; i < nr_segs; i++) {
- if (iov[seg].iov_base == iov[i].iov_base)
+ /* If this is a write we don't need to check anymore */
+ if (rw & WRITE)
+ return 0;
+ /*
+ * Check to make sure we don't have duplicate iov_base's in this
+ * iovec, if so return EINVAL, otherwise we'll get csum errors
+ * when reading back.
+ */
+ for (seg = 0; seg < iter->nr_segs; seg++) {
+ for (i = seg + 1; i < iter->nr_segs; i++) {
+ if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
goto out;
}
}
@@ -7210,8 +7611,7 @@ out:
}
static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
- const struct iovec *iov, loff_t offset,
- unsigned long nr_segs)
+ struct iov_iter *iter, loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
@@ -7221,23 +7621,23 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
bool relock = false;
ssize_t ret;
- if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
- offset, nr_segs))
+ if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))
return 0;
atomic_inc(&inode->i_dio_count);
- smp_mb__after_atomic_inc();
+ smp_mb__after_atomic();
/*
- * The generic stuff only does filemap_write_and_wait_range, which isn't
- * enough if we've written compressed pages to this area, so we need to
- * call btrfs_wait_ordered_range to make absolutely sure that any
- * outstanding dirty pages are on disk.
+ * The generic stuff only does filemap_write_and_wait_range, which
+ * isn't enough if we've written compressed pages to this area, so
+ * we need to flush the dirty pages again to make absolutely sure
+ * that any outstanding dirty pages are on disk.
*/
- count = iov_length(iov, nr_segs);
- ret = btrfs_wait_ordered_range(inode, offset, count);
- if (ret)
- return ret;
+ count = iov_iter_count(iter);
+ if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+ &BTRFS_I(inode)->runtime_flags))
+ filemap_fdatawrite_range(inode->i_mapping, offset,
+ offset + count - 1);
if (rw & WRITE) {
/*
@@ -7261,7 +7661,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
ret = __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
- iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+ iter, offset, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, flags);
if (rw & WRITE) {
if (ret < 0 && ret != -EIOCBQUEUED)
@@ -7367,6 +7767,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
struct extent_state *cached_state = NULL;
u64 page_start = page_offset(page);
u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+ int inode_evicting = inode->i_state & I_FREEING;
/*
* we have the page locked, so new writeback can't start,
@@ -7382,17 +7783,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
btrfs_releasepage(page, GFP_NOFS);
return;
}
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+
+ if (!inode_evicting)
+ lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+ ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) {
/*
* IO on this page will never be started, so we need
* to account for any ordered extents now
*/
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
+ if (!inode_evicting)
+ clear_extent_bit(tree, page_start, page_end,
+ EXTENT_DIRTY | EXTENT_DELALLOC |
+ EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 0, &cached_state,
+ GFP_NOFS);
/*
* whoever cleared the private bit is responsible
* for the finish_ordered_io
@@ -7416,14 +7821,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
btrfs_finish_ordered_io(ordered);
}
btrfs_put_ordered_extent(ordered);
- cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+ if (!inode_evicting) {
+ cached_state = NULL;
+ lock_extent_bits(tree, page_start, page_end, 0,
+ &cached_state);
+ }
+ }
+
+ if (!inode_evicting) {
+ clear_extent_bit(tree, page_start, page_end,
+ EXTENT_LOCKED | EXTENT_DIRTY |
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, 1, 1,
+ &cached_state, GFP_NOFS);
+
+ __btrfs_releasepage(page, GFP_NOFS);
}
- clear_extent_bit(tree, page_start, page_end,
- EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
- &cached_state, GFP_NOFS);
- __btrfs_releasepage(page, GFP_NOFS);
ClearPageChecked(page);
if (PagePrivate(page)) {
@@ -7641,27 +8054,6 @@ static int btrfs_truncate(struct inode *inode)
BUG_ON(ret);
/*
- * setattr is responsible for setting the ordered_data_close flag,
- * but that is only tested during the last file release. That
- * could happen well after the next commit, leaving a great big
- * window where new writes may get lost if someone chooses to write
- * to this file after truncating to zero
- *
- * The inode doesn't have any dirty data here, and so if we commit
- * this is a noop. If someone immediately starts writing to the inode
- * it is very likely we'll catch some of their writes in this
- * transaction, and the commit will find this file on the ordered
- * data list with good things to send down.
- *
- * This is a best effort solution, there is still a window where
- * using truncate to replace the contents of the file will
- * end up with a zero length file after a crash.
- */
- if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
- &BTRFS_I(inode)->runtime_flags))
- btrfs_add_ordered_operation(trans, root, inode);
-
- /*
* So if we truncate and then write and fsync we normally would just
* write the extents that changed, which is a problem if we need to
* first truncate that entire inode. So set this flag so we write out
@@ -7733,7 +8125,9 @@ out:
* create a new subvolume directory/inode (helper for the ioctl).
*/
int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *new_root, u64 new_dirid)
+ struct btrfs_root *new_root,
+ struct btrfs_root *parent_root,
+ u64 new_dirid)
{
struct inode *inode;
int err;
@@ -7750,6 +8144,13 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
set_nlink(inode, 1);
btrfs_i_size_write(inode, 0);
+ unlock_new_inode(inode);
+
+ err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
+ if (err)
+ btrfs_err(new_root->fs_info,
+ "error inheriting subvolume %llu properties: %d",
+ new_root->root_key.objectid, err);
err = btrfs_update_inode(trans, new_root, inode);
@@ -7776,6 +8177,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->flags = 0;
ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
+ ei->dir_index = 0;
ei->last_unlink_trans = 0;
ei->last_log_commit = 0;
@@ -7799,7 +8201,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
- INIT_LIST_HEAD(&ei->ordered_operations);
RB_CLEAR_NODE(&ei->rb_node);
return inode;
@@ -7839,17 +8240,6 @@ void btrfs_destroy_inode(struct inode *inode)
if (!root)
goto free;
- /*
- * Make sure we're properly removed from the ordered operation
- * lists.
- */
- smp_mb();
- if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
- spin_lock(&root->fs_info->ordered_root_lock);
- list_del_init(&BTRFS_I(inode)->ordered_operations);
- spin_unlock(&root->fs_info->ordered_root_lock);
- }
-
if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
&BTRFS_I(inode)->runtime_flags)) {
btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8031,12 +8421,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
ret = 0;
/*
- * we're using rename to replace one file with another.
- * and the replacement file is large. Start IO on it now so
- * we don't add too much work to the end of the transaction
+ * we're using rename to replace one file with another. Start IO on it
+ * now so we don't add too much work to the end of the transaction
*/
- if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
- old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+ if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
filemap_flush(old_inode->i_mapping);
/* close the racy window with snapshot create/destroy ioctl */
@@ -8063,9 +8451,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (ret)
goto out_fail;
+ BTRFS_I(old_inode)->dir_index = 0ULL;
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
/* force full log commit if subvolume involved. */
- root->fs_info->last_trans_log_full_commit = trans->transid;
+ btrfs_set_log_full_commit(root->fs_info, trans);
} else {
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
@@ -8083,12 +8472,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
btrfs_pin_log_trans(root);
}
- /*
- * make sure the inode gets flushed if it is replacing
- * something.
- */
- if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
- btrfs_add_ordered_operation(trans, root, old_inode);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
@@ -8151,6 +8534,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out_fail;
}
+ if (old_inode->i_nlink == 1)
+ BTRFS_I(old_inode)->dir_index = index;
+
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
struct dentry *parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
@@ -8165,6 +8551,16 @@ out_notrans:
return ret;
}
+static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ if (flags & ~RENAME_NOREPLACE)
+ return -EINVAL;
+
+ return btrfs_rename(old_dir, old_dentry, new_dir, new_dentry);
+}
+
static void btrfs_run_delalloc_work(struct btrfs_work *work)
{
struct btrfs_delalloc_work *delalloc_work;
@@ -8203,7 +8599,9 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
work->inode = inode;
work->wait = wait;
work->delay_iput = delay_iput;
- work->work.func = btrfs_run_delalloc_work;
+ WARN_ON_ONCE(!inode);
+ btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
+ btrfs_run_delalloc_work, NULL, NULL);
return work;
}
@@ -8218,7 +8616,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
+ int nr)
{
struct btrfs_inode *binode;
struct inode *inode;
@@ -8230,6 +8629,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
INIT_LIST_HEAD(&works);
INIT_LIST_HEAD(&splice);
+ mutex_lock(&root->delalloc_mutex);
spin_lock(&root->delalloc_lock);
list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
@@ -8255,19 +8655,16 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
goto out;
}
list_add_tail(&work->list, &works);
- btrfs_queue_worker(&root->fs_info->flush_workers,
- &work->work);
-
+ btrfs_queue_work(root->fs_info->flush_workers,
+ &work->work);
+ ret++;
+ if (nr != -1 && ret >= nr)
+ goto out;
cond_resched();
spin_lock(&root->delalloc_lock);
}
spin_unlock(&root->delalloc_lock);
- list_for_each_entry_safe(work, next, &works, list) {
- list_del_init(&work->list);
- btrfs_wait_and_free_delalloc_work(work);
- }
- return 0;
out:
list_for_each_entry_safe(work, next, &works, list) {
list_del_init(&work->list);
@@ -8279,6 +8676,7 @@ out:
list_splice_tail(&splice, &root->delalloc_inodes);
spin_unlock(&root->delalloc_lock);
}
+ mutex_unlock(&root->delalloc_mutex);
return ret;
}
@@ -8286,10 +8684,12 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
int ret;
- if (root->fs_info->sb->s_flags & MS_RDONLY)
+ if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
return -EROFS;
- ret = __start_delalloc_inodes(root, delay_iput);
+ ret = __start_delalloc_inodes(root, delay_iput, -1);
+ if (ret > 0)
+ ret = 0;
/*
* the filemap_flush will queue IO into the worker threads, but
* we have to make sure the IO is actually started and that
@@ -8306,20 +8706,22 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
return ret;
}
-int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
+int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput,
+ int nr)
{
struct btrfs_root *root;
struct list_head splice;
int ret;
- if (fs_info->sb->s_flags & MS_RDONLY)
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
return -EROFS;
INIT_LIST_HEAD(&splice);
+ mutex_lock(&fs_info->delalloc_root_mutex);
spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice);
- while (!list_empty(&splice)) {
+ while (!list_empty(&splice) && nr) {
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
root = btrfs_grab_fs_root(root);
@@ -8328,15 +8730,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
&fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
- ret = __start_delalloc_inodes(root, delay_iput);
+ ret = __start_delalloc_inodes(root, delay_iput, nr);
btrfs_put_fs_root(root);
- if (ret)
+ if (ret < 0)
goto out;
+ if (nr != -1) {
+ nr -= ret;
+ WARN_ON(nr < 0);
+ }
spin_lock(&fs_info->delalloc_root_lock);
}
spin_unlock(&fs_info->delalloc_root_lock);
+ ret = 0;
atomic_inc(&fs_info->async_submit_draining);
while (atomic_read(&fs_info->nr_async_submits) ||
atomic_read(&fs_info->async_delalloc_pages)) {
@@ -8345,13 +8752,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput)
atomic_read(&fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&fs_info->async_submit_draining);
- return 0;
out:
if (!list_empty_careful(&splice)) {
spin_lock(&fs_info->delalloc_root_lock);
list_splice_tail(&splice, &fs_info->delalloc_roots);
spin_unlock(&fs_info->delalloc_root_lock);
}
+ mutex_unlock(&fs_info->delalloc_root_mutex);
return ret;
}
@@ -8398,12 +8805,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
- err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
- goto out_unlock;
- }
-
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -8412,23 +8813,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
*/
inode->i_fop = &btrfs_file_operations;
inode->i_op = &btrfs_file_inode_operations;
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+
+ err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+ if (err)
+ goto out_unlock_inode;
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
- drop_inode = 1;
- else {
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- }
- if (drop_inode)
- goto out_unlock;
+ goto out_unlock_inode;
path = btrfs_alloc_path();
if (!path) {
err = -ENOMEM;
- drop_inode = 1;
- goto out_unlock;
+ goto out_unlock_inode;
}
key.objectid = btrfs_ino(inode);
key.offset = 0;
@@ -8437,9 +8837,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
err = btrfs_insert_empty_item(trans, root, path, &key,
datasize);
if (err) {
- drop_inode = 1;
btrfs_free_path(path);
- goto out_unlock;
+ goto out_unlock_inode;
}
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -8463,12 +8862,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode_set_bytes(inode, name_len);
btrfs_i_size_write(inode, name_len);
err = btrfs_update_inode(trans, root, inode);
- if (err)
+ if (err) {
drop_inode = 1;
+ goto out_unlock_inode;
+ }
+
+ unlock_new_inode(inode);
+ d_instantiate(dentry, inode);
out_unlock:
- if (!err)
- d_instantiate(dentry, inode);
btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
@@ -8476,6 +8878,11 @@ out_unlock:
}
btrfs_btree_balance_dirty(root);
return err;
+
+out_unlock_inode:
+ drop_inode = 1;
+ unlock_new_inode(inode);
+ goto out_unlock;
}
static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -8507,7 +8914,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
cur_bytes = max(cur_bytes, min_size);
ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
- *alloc_hint, &ins, 1);
+ *alloc_hint, &ins, 1, 0);
if (ret) {
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -8521,7 +8928,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
BTRFS_FILE_EXTENT_PREALLOC);
if (ret) {
btrfs_free_reserved_extent(root, ins.objectid,
- ins.offset);
+ ins.offset, 0);
btrfs_abort_transaction(trans, root, ret);
if (own_trans)
btrfs_end_transaction(trans, root);
@@ -8631,6 +9038,78 @@ static int btrfs_permission(struct inode *inode, int mask)
return generic_permission(inode, mask);
}
+static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(dir)->root;
+ struct inode *inode = NULL;
+ u64 objectid;
+ u64 index;
+ int ret = 0;
+
+ /*
+ * 5 units required for adding orphan entry
+ */
+ trans = btrfs_start_transaction(root, 5);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ ret = btrfs_find_free_ino(root, &objectid);
+ if (ret)
+ goto out;
+
+ inode = btrfs_new_inode(trans, root, dir, NULL, 0,
+ btrfs_ino(dir), objectid, mode, &index);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
+ goto out;
+ }
+
+ inode->i_fop = &btrfs_file_operations;
+ inode->i_op = &btrfs_file_inode_operations;
+
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+
+ ret = btrfs_init_inode_security(trans, inode, dir, NULL);
+ if (ret)
+ goto out_inode;
+
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ goto out_inode;
+ ret = btrfs_orphan_add(trans, inode);
+ if (ret)
+ goto out_inode;
+
+ /*
+ * We set number of links to 0 in btrfs_new_inode(), and here we set
+ * it to 1 because d_tmpfile() will issue a warning if the count is 0,
+ * through:
+ *
+ * d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
+ */
+ set_nlink(inode, 1);
+ unlock_new_inode(inode);
+ d_tmpfile(dentry, inode);
+ mark_inode_dirty(inode);
+
+out:
+ btrfs_end_transaction(trans, root);
+ if (ret)
+ iput(inode);
+ btrfs_balance_delayed_items(root);
+ btrfs_btree_balance_dirty(root);
+ return ret;
+
+out_inode:
+ unlock_new_inode(inode);
+ goto out;
+
+}
+
static const struct inode_operations btrfs_dir_inode_operations = {
.getattr = btrfs_getattr,
.lookup = btrfs_lookup,
@@ -8639,7 +9118,7 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.link = btrfs_link,
.mkdir = btrfs_mkdir,
.rmdir = btrfs_rmdir,
- .rename = btrfs_rename,
+ .rename2 = btrfs_rename2,
.symlink = btrfs_symlink,
.setattr = btrfs_setattr,
.mknod = btrfs_mknod,
@@ -8649,12 +9128,15 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
+ .tmpfile = btrfs_tmpfile,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
@@ -8724,6 +9206,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.permission = btrfs_permission,
.fiemap = btrfs_fiemap,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_special_inode_operations = {
@@ -8735,6 +9218,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.get_acl = btrfs_get_acl,
+ .set_acl = btrfs_set_acl,
.update_time = btrfs_update_time,
};
static const struct inode_operations btrfs_symlink_inode_operations = {
@@ -8748,7 +9232,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
- .get_acl = btrfs_get_acl,
.update_time = btrfs_update_time,
};