summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c71
1 files changed, 41 insertions, 30 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d04c82c88418..6862cd7e21a9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -985,6 +985,7 @@ static noinline int cow_file_range(struct inode *inode,
u64 num_bytes;
unsigned long ram_size;
u64 cur_alloc_size = 0;
+ u64 min_alloc_size;
u64 blocksize = fs_info->sectorsize;
struct btrfs_key ins;
struct extent_map *em;
@@ -1035,10 +1036,26 @@ static noinline int cow_file_range(struct inode *inode,
btrfs_drop_extent_cache(BTRFS_I(inode), start,
start + num_bytes - 1, 0);
+ /*
+ * Relocation relies on the relocated extents to have exactly the same
+ * size as the original extents. Normally writeback for relocation data
+ * extents follows a NOCOW path because relocation preallocates the
+ * extents. However, due to an operation such as scrub turning a block
+ * group to RO mode, it may fallback to COW mode, so we must make sure
+ * an extent allocated during COW has exactly the requested size and can
+ * not be split into smaller extents, otherwise relocation breaks and
+ * fails during the stage where it updates the bytenr of file extent
+ * items.
+ */
+ if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ min_alloc_size = num_bytes;
+ else
+ min_alloc_size = fs_info->sectorsize;
+
while (num_bytes > 0) {
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
- fs_info->sectorsize, 0, alloc_hint,
+ min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
if (ret < 0)
goto out_unlock;
@@ -1361,6 +1378,8 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
int *page_started, unsigned long *nr_written)
{
const bool is_space_ino = btrfs_is_free_space_inode(BTRFS_I(inode));
+ const bool is_reloc_ino = (BTRFS_I(inode)->root->root_key.objectid ==
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
const u64 range_bytes = end + 1 - start;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
u64 range_start = start;
@@ -1391,18 +1410,23 @@ static int fallback_to_cow(struct inode *inode, struct page *locked_page,
* data space info, which we incremented in the step above.
*
* If we need to fallback to cow and the inode corresponds to a free
- * space cache inode, we must also increment bytes_may_use of the data
- * space_info for the same reason. Space caches always get a prealloc
+ * space cache inode or an inode of the data relocation tree, we must
+ * also increment bytes_may_use of the data space_info for the same
+ * reason. Space caches and relocated data extents always get a prealloc
* extent for them, however scrub or balance may have set the block
- * group that contains that extent to RO mode.
+ * group that contains that extent to RO mode and therefore force COW
+ * when starting writeback.
*/
count = count_range_bits(io_tree, &range_start, end, range_bytes,
EXTENT_NORESERVE, 0);
- if (count > 0 || is_space_ino) {
- const u64 bytes = is_space_ino ? range_bytes : count;
+ if (count > 0 || is_space_ino || is_reloc_ino) {
+ u64 bytes = count;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_space_info *sinfo = fs_info->data_sinfo;
+ if (is_space_ino || is_reloc_ino)
+ bytes = range_bytes;
+
spin_lock(&sinfo->lock);
btrfs_space_info_update_bytes_may_use(fs_info, sinfo, bytes);
spin_unlock(&sinfo->lock);
@@ -1666,12 +1690,8 @@ out_check:
ret = fallback_to_cow(inode, locked_page, cow_start,
found_key.offset - 1,
page_started, nr_written);
- if (ret) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
+ if (ret)
goto error;
- }
cow_start = (u64)-1;
}
@@ -1687,9 +1707,6 @@ out_check:
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
- if (nocow)
- btrfs_dec_nocow_writers(fs_info,
- disk_bytenr);
ret = PTR_ERR(em);
goto error;
}
@@ -7865,9 +7882,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.overwrite = 1;
inode_unlock(inode);
relock = true;
- } else if (iocb->ki_flags & IOCB_NOWAIT) {
- ret = -EAGAIN;
- goto out;
}
ret = btrfs_delalloc_reserve_space(inode, &data_reserved,
offset, count);
@@ -8109,20 +8123,17 @@ again:
/*
* Qgroup reserved space handler
* Page here will be either
- * 1) Already written to disk
- * In this case, its reserved space is released from data rsv map
- * and will be freed by delayed_ref handler finally.
- * So even we call qgroup_free_data(), it won't decrease reserved
- * space.
- * 2) Not written to disk
- * This means the reserved space should be freed here. However,
- * if a truncate invalidates the page (by clearing PageDirty)
- * and the page is accounted for while allocating extent
- * in btrfs_check_data_free_space() we let delayed_ref to
- * free the entire extent.
+ * 1) Already written to disk or ordered extent already submitted
+ * Then its QGROUP_RESERVED bit in io_tree is already cleaned.
+ * Qgroup will be handled by its qgroup_record then.
+ * btrfs_qgroup_free_data() call will do nothing here.
+ *
+ * 2) Not written to disk yet
+ * Then btrfs_qgroup_free_data() call will clear the QGROUP_RESERVED
+ * bit of its io_tree, and free the qgroup reserved data space.
+ * Since the IO will never happen for this page.
*/
- if (PageDirty(page))
- btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
+ btrfs_qgroup_free_data(inode, NULL, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |