summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/accessors.c2
-rw-r--r--fs/btrfs/accessors.h2
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/bio.c37
-rw-r--r--fs/btrfs/bio.h3
-rw-r--r--fs/btrfs/block-group.c2
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/defrag.c12
-rw-r--r--fs/btrfs/delayed-ref.c55
-rw-r--r--fs/btrfs/delayed-ref.h10
-rw-r--r--fs/btrfs/dir-item.c4
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/extent-tree.c26
-rw-r--r--fs/btrfs/extent_io.c31
-rw-r--r--fs/btrfs/extent_map.c38
-rw-r--r--fs/btrfs/file.c34
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--fs/btrfs/free-space-cache.h6
-rw-r--r--fs/btrfs/inode.c14
-rw-r--r--fs/btrfs/ioctl.c4
-rw-r--r--fs/btrfs/messages.c3
-rw-r--r--fs/btrfs/qgroup.c23
-rw-r--r--fs/btrfs/qgroup.h2
-rw-r--r--fs/btrfs/relocation.c77
-rw-r--r--fs/btrfs/send.c35
-rw-r--r--fs/btrfs/super.c12
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/btrfs/tree-log.c6
-rw-r--r--fs/btrfs/uuid-tree.c2
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/btrfs/volumes.h6
-rw-r--r--fs/btrfs/zoned.c2
33 files changed, 286 insertions, 199 deletions
diff --git a/fs/btrfs/accessors.c b/fs/btrfs/accessors.c
index 79026917db19..e3716516ca38 100644
--- a/fs/btrfs/accessors.c
+++ b/fs/btrfs/accessors.c
@@ -3,7 +3,7 @@
* Copyright (C) 2007 Oracle. All rights reserved.
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "messages.h"
#include "extent_io.h"
#include "fs.h"
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index b2eb9cde2c5d..7a7e0ef69973 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -3,7 +3,7 @@
#ifndef BTRFS_ACCESSORS_H
#define BTRFS_ACCESSORS_H
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/stddef.h>
#include <linux/types.h>
#include <linux/align.h>
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e2f478ecd7fd..f8e1d5b2c512 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -3179,10 +3179,14 @@ void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
btrfs_backref_cleanup_node(cache, node);
}
- cache->last_trans = 0;
-
- for (i = 0; i < BTRFS_MAX_LEVEL; i++)
- ASSERT(list_empty(&cache->pending[i]));
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ while (!list_empty(&cache->pending[i])) {
+ node = list_first_entry(&cache->pending[i],
+ struct btrfs_backref_node,
+ list);
+ btrfs_backref_cleanup_node(cache, node);
+ }
+ }
ASSERT(list_empty(&cache->pending_edge));
ASSERT(list_empty(&cache->useless_node));
ASSERT(list_empty(&cache->changed));
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index fec5c6cde0a7..7e0f9600b80c 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -49,6 +49,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
bbio->end_io = end_io;
bbio->private = private;
atomic_set(&bbio->pending_ios, 1);
+ WRITE_ONCE(bbio->status, BLK_STS_OK);
}
/*
@@ -113,41 +114,29 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
}
}
-static void btrfs_orig_write_end_io(struct bio *bio);
-
-static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
- struct btrfs_bio *orig_bbio)
-{
- /*
- * For writes we tolerate nr_mirrors - 1 write failures, so we can't
- * just blindly propagate a write failure here. Instead increment the
- * error count in the original I/O context so that it is guaranteed to
- * be larger than the error tolerance.
- */
- if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
- struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
- struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
-
- atomic_add(orig_bioc->max_errors, &orig_bioc->error);
- } else {
- orig_bbio->bio.bi_status = bbio->bio.bi_status;
- }
-}
-
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
- if (bbio->bio.bi_status)
- btrfs_bbio_propagate_error(bbio, orig_bbio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
- if (atomic_dec_and_test(&bbio->pending_ios))
+ /*
+ * At this point, bbio always points to the original btrfs_bio. Save
+ * the first error in it.
+ */
+ if (status != BLK_STS_OK)
+ cmpxchg(&bbio->status, BLK_STS_OK, status);
+
+ if (atomic_dec_and_test(&bbio->pending_ios)) {
+ /* Load split bio's error which might be set above. */
+ if (status == BLK_STS_OK)
+ bbio->bio.bi_status = READ_ONCE(bbio->status);
__btrfs_bio_end_io(bbio);
+ }
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index e48612340745..e2fe16074ad6 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -79,6 +79,9 @@ struct btrfs_bio {
/* File system that this I/O operates on. */
struct btrfs_fs_info *fs_info;
+ /* Save the first error status of split bio. */
+ blk_status_t status;
+
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 7980b2e33a92..4423d8b716a5 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -3819,6 +3819,8 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
spin_lock(&cache->lock);
if (cache->ro)
space_info->bytes_readonly += num_bytes;
+ else if (btrfs_is_zoned(cache->fs_info))
+ space_info->bytes_zone_unusable += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
space_info->max_extent_size = 0;
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 9a4b7c119318..e152fde888fc 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -152,6 +152,7 @@ struct btrfs_inode {
* logged_trans), to access/update delalloc_bytes, new_delalloc_bytes,
* defrag_bytes, disk_i_size, outstanding_extents, csum_bytes and to
* update the VFS' inode number of bytes used.
+ * Also protects setting struct file::private_data.
*/
spinlock_t lock;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 1a44fb9845e3..317a3712270f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -463,6 +463,8 @@ struct btrfs_file_private {
void *filldir_buf;
u64 last_index;
struct extent_state *llseek_cached_state;
+ /* Task that allocated this structure. */
+ struct task_struct *owner_task;
};
static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info)
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index acf1f39e45d0..968dae953948 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -213,6 +213,8 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
&fs_info->defrag_inodes, rb_node)
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
+ fs_info->defrag_inodes = RB_ROOT;
+
spin_unlock(&fs_info->defrag_inodes_lock);
}
@@ -761,12 +763,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
* We can get a merged extent, in that case, we need to re-search
* tree to get the original em for defrag.
*
- * If @newer_than is 0 or em::generation < newer_than, we can trust
- * this em, as either we don't care about the generation, or the
- * merged extent map will be rejected anyway.
+ * This is because even if we have adjacent extents that are contiguous
+ * and compatible (same type and flags), we still want to defrag them
+ * so that we use less metadata (extent items in the extent tree and
+ * file extent items in the inode's subvolume tree).
*/
- if (em && (em->flags & EXTENT_FLAG_MERGED) &&
- newer_than && em->generation >= newer_than) {
+ if (em && (em->flags & EXTENT_FLAG_MERGED)) {
free_extent_map(em);
em = NULL;
}
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ad9ef8312e41..115b90d29b1d 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -840,6 +840,8 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
* helper function to actually insert a head node into the rbtree.
* this does all the dirty work in terms of maintaining the correct
* overall modification count.
+ *
+ * Returns an error pointer in case of an error.
*/
static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_trans_handle *trans,
@@ -847,6 +849,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_qgroup_extent_record *qrecord,
int action, bool *qrecord_inserted_ret)
{
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
bool qrecord_inserted = false;
@@ -857,18 +860,21 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
if (qrecord) {
int ret;
- ret = btrfs_qgroup_trace_extent_nolock(trans->fs_info,
- delayed_refs, qrecord);
+ ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord);
if (ret) {
/* Clean up if insertion fails or item exists. */
- xa_release(&delayed_refs->dirty_extents, qrecord->bytenr);
+ xa_release(&delayed_refs->dirty_extents,
+ qrecord->bytenr >> fs_info->sectorsize_bits);
+ /* Caller responsible for freeing qrecord on error. */
+ if (ret < 0)
+ return ERR_PTR(ret);
kfree(qrecord);
} else {
qrecord_inserted = true;
}
}
- trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
+ trace_add_delayed_ref_head(fs_info, head_ref, action);
existing = htree_insert(&delayed_refs->href_root,
&head_ref->href_node);
@@ -890,8 +896,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
if (head_ref->is_data && head_ref->ref_mod < 0) {
delayed_refs->pending_csums += head_ref->num_bytes;
trans->delayed_ref_csum_deletions +=
- btrfs_csum_bytes_to_leaves(trans->fs_info,
- head_ref->num_bytes);
+ btrfs_csum_bytes_to_leaves(fs_info, head_ref->num_bytes);
}
delayed_refs->num_heads++;
delayed_refs->num_heads_ready++;
@@ -1000,27 +1005,36 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_ref_head *head_ref;
+ struct btrfs_delayed_ref_head *new_head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
bool qrecord_inserted;
int action = generic_ref->action;
bool merged;
+ int ret;
node = kmem_cache_alloc(btrfs_delayed_ref_node_cachep, GFP_NOFS);
if (!node)
return -ENOMEM;
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
- if (!head_ref)
+ if (!head_ref) {
+ ret = -ENOMEM;
goto free_node;
+ }
if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
record = kzalloc(sizeof(*record), GFP_NOFS);
- if (!record)
+ if (!record) {
+ ret = -ENOMEM;
goto free_head_ref;
+ }
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
- generic_ref->bytenr, GFP_NOFS))
+ generic_ref->bytenr >> fs_info->sectorsize_bits,
+ GFP_NOFS)) {
+ ret = -ENOMEM;
goto free_record;
+ }
}
init_delayed_ref_common(fs_info, node, generic_ref);
@@ -1034,8 +1048,14 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
* insert both the head node and the new ref without dropping
* the spin lock
*/
- head_ref = add_delayed_ref_head(trans, head_ref, record,
- action, &qrecord_inserted);
+ new_head_ref = add_delayed_ref_head(trans, head_ref, record,
+ action, &qrecord_inserted);
+ if (IS_ERR(new_head_ref)) {
+ spin_unlock(&delayed_refs->lock);
+ ret = PTR_ERR(new_head_ref);
+ goto free_record;
+ }
+ head_ref = new_head_ref;
merged = insert_delayed_ref(trans, head_ref, node);
spin_unlock(&delayed_refs->lock);
@@ -1063,7 +1083,7 @@ free_head_ref:
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
free_node:
kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
- return -ENOMEM;
+ return ret;
}
/*
@@ -1094,6 +1114,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_delayed_ref_head *head_ref;
+ struct btrfs_delayed_ref_head *head_ref_ret;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_ref generic_ref = {
.type = BTRFS_REF_METADATA,
@@ -1113,11 +1134,15 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
- add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD,
- NULL);
-
+ head_ref_ret = add_delayed_ref_head(trans, head_ref, NULL,
+ BTRFS_UPDATE_DELAYED_HEAD, NULL);
spin_unlock(&delayed_refs->lock);
+ if (IS_ERR(head_ref_ret)) {
+ kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+ return PTR_ERR(head_ref_ret);
+ }
+
/*
* Need to update the delayed_refs_rsv with any changes we may have
* made.
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 085f30968aba..352921e76c74 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -202,7 +202,15 @@ struct btrfs_delayed_ref_root {
/* head ref rbtree */
struct rb_root_cached href_root;
- /* Track dirty extent records. */
+ /*
+ * Track dirty extent records.
+ * The keys correspond to the logical address of the extent ("bytenr")
+ * right shifted by fs_info->sectorsize_bits. This is both to get a more
+ * dense index space (optimizes xarray structure) and because indexes in
+ * xarrays are of "unsigned long" type, meaning they are 32 bits wide on
+ * 32 bits platforms, limiting the extent range to 4G which is too low
+ * and makes it unusable (truncated index values) on 32 bits platforms.
+ */
struct xarray dirty_extents;
/* this spin lock protects the rbtree and the entries inside */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 001c0c2f872c..1e8cd7c9472e 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -347,8 +347,8 @@ btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path,
return di;
}
/* Adjust return code if the key was not found in the next leaf. */
- if (ret > 0)
- ret = 0;
+ if (ret >= 0)
+ ret = -ENOENT;
return ERR_PTR(ret);
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 25d768e67e37..b11bfe68dd65 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -17,7 +17,7 @@
#include <linux/error-injection.h>
#include <linux/crc32c.h>
#include <linux/sched/mm.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <crypto/hash.h>
#include "ctree.h"
#include "disk-io.h"
@@ -1959,7 +1959,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -4256,6 +4256,17 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_cleanup_defrag_inodes(fs_info);
/*
+ * Wait for any fixup workers to complete.
+ * If we don't wait for them here and they are still running by the time
+ * we call kthread_stop() against the cleaner kthread further below, we
+ * get an use-after-free on the cleaner because the fixup worker adds an
+ * inode to the list of delayed iputs and then attempts to wakeup the
+ * cleaner kthread, which was already stopped and destroyed. We parked
+ * already the cleaner, but below we run all pending delayed iputs.
+ */
+ btrfs_flush_workqueue(fs_info->fixup_workers);
+
+ /*
* After we parked the cleaner kthread, ordered extents may have
* completed and created new delayed iputs. If one of the async reclaim
* tasks is running and in the RUN_DELAYED_IPUTS flush state, then we
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a5966324607d..d9f511babd89 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1300,13 +1300,29 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
bytes_left = end - start;
}
- if (bytes_left) {
+ while (bytes_left) {
+ u64 bytes_to_discard = min(BTRFS_MAX_DISCARD_CHUNK_SIZE, bytes_left);
+
ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
- bytes_left >> SECTOR_SHIFT,
+ bytes_to_discard >> SECTOR_SHIFT,
GFP_NOFS);
- if (!ret)
- *discarded_bytes += bytes_left;
+
+ if (ret) {
+ if (ret != -EOPNOTSUPP)
+ break;
+ continue;
+ }
+
+ start += bytes_to_discard;
+ bytes_left -= bytes_to_discard;
+ *discarded_bytes += bytes_to_discard;
+
+ if (btrfs_trim_interrupted()) {
+ ret = -ERESTARTSYS;
+ break;
+ }
}
+
return ret;
}
@@ -6459,7 +6475,7 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
start += len;
*trimmed += bytes;
- if (fatal_signal_pending(current)) {
+ if (btrfs_trim_interrupted()) {
ret = -ERESTARTSYS;
break;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 39c9677c47d5..872cca54cc6c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -262,22 +262,23 @@ static noinline int lock_delalloc_folios(struct inode *inode,
for (i = 0; i < found_folios; i++) {
struct folio *folio = fbatch.folios[i];
- u32 len = end + 1 - start;
+ u64 range_start;
+ u32 range_len;
if (folio == locked_folio)
continue;
- if (btrfs_folio_start_writer_lock(fs_info, folio, start,
- len))
- goto out;
-
+ folio_lock(folio);
if (!folio_test_dirty(folio) || folio->mapping != mapping) {
- btrfs_folio_end_writer_lock(fs_info, folio, start,
- len);
+ folio_unlock(folio);
goto out;
}
+ range_start = max_t(u64, folio_pos(folio), start);
+ range_len = min_t(u64, folio_pos(folio) + folio_size(folio),
+ end + 1) - range_start;
+ btrfs_folio_set_writer_lock(fs_info, folio, range_start, range_len);
- processed_end = folio_pos(folio) + folio_size(folio) - 1;
+ processed_end = range_start + range_len - 1;
}
folio_batch_release(&fbatch);
cond_resched();
@@ -1306,6 +1307,13 @@ static int submit_one_sector(struct btrfs_inode *inode,
free_extent_map(em);
em = NULL;
+ /*
+ * Although the PageDirty bit is cleared before entering this
+ * function, subpage dirty bit is not cleared.
+ * So clear subpage dirty bit here so next time we won't submit
+ * a folio for a range already written to disk.
+ */
+ btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize);
btrfs_set_range_writeback(inode, filepos, filepos + sectorsize - 1);
/*
* Above call should set the whole folio with writeback flag, even
@@ -1315,13 +1323,6 @@ static int submit_one_sector(struct btrfs_inode *inode,
*/
ASSERT(folio_test_writeback(folio));
- /*
- * Although the PageDirty bit is cleared before entering this
- * function, subpage dirty bit is not cleared.
- * So clear subpage dirty bit here so next time we won't submit
- * folio for range already written to disk.
- */
- btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize);
submit_extent_folio(bio_ctrl, disk_bytenr, folio,
sectorsize, filepos - folio_pos(folio));
return 0;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 25d191f1ac10..1d93e1202c33 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -230,7 +230,12 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
if (extent_map_end(prev) != next->start)
return false;
- if (prev->flags != next->flags)
+ /*
+ * The merged flag is not an on-disk flag, it just indicates we had the
+ * extent maps of 2 (or more) adjacent extents merged, so factor it out.
+ */
+ if ((prev->flags & ~EXTENT_FLAG_MERGED) !=
+ (next->flags & ~EXTENT_FLAG_MERGED))
return false;
if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
@@ -243,13 +248,19 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
/*
* Handle the on-disk data extents merge for @prev and @next.
*
+ * @prev: left extent to merge
+ * @next: right extent to merge
+ * @merged: the extent we will not discard after the merge; updated with new values
+ *
+ * After this, one of the two extents is the new merged extent and the other is
+ * removed from the tree and likely freed. Note that @merged is one of @prev/@next
+ * so there is const/non-const aliasing occurring here.
+ *
* Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
* For now only uncompressed regular extent can be merged.
- *
- * @prev and @next will be both updated to point to the new merged range.
- * Thus one of them should be removed by the caller.
*/
-static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
+static void merge_ondisk_extents(const struct extent_map *prev, const struct extent_map *next,
+ struct extent_map *merged)
{
u64 new_disk_bytenr;
u64 new_disk_num_bytes;
@@ -284,15 +295,10 @@ static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *nex
new_disk_bytenr;
new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
- prev->disk_bytenr = new_disk_bytenr;
- prev->disk_num_bytes = new_disk_num_bytes;
- prev->ram_bytes = new_disk_num_bytes;
- prev->offset = new_offset;
-
- next->disk_bytenr = new_disk_bytenr;
- next->disk_num_bytes = new_disk_num_bytes;
- next->ram_bytes = new_disk_num_bytes;
- next->offset = new_offset;
+ merged->disk_bytenr = new_disk_bytenr;
+ merged->disk_num_bytes = new_disk_num_bytes;
+ merged->ram_bytes = new_disk_num_bytes;
+ merged->offset = new_offset;
}
static void dump_extent_map(struct btrfs_fs_info *fs_info, const char *prefix,
@@ -361,7 +367,7 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
em->generation = max(em->generation, merge->generation);
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
- merge_ondisk_extents(merge, em);
+ merge_ondisk_extents(merge, em, em);
em->flags |= EXTENT_FLAG_MERGED;
validate_extent_map(fs_info, em);
@@ -378,7 +384,7 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
em->len += merge->len;
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
- merge_ondisk_extents(em, merge);
+ merge_ondisk_extents(em, merge, em);
validate_extent_map(fs_info, em);
rb_erase(&merge->rb_node, &tree->root);
RB_CLEAR_NODE(&merge->rb_node);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c5e36f58eb07..4fb521d91b06 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3485,7 +3485,7 @@ static bool find_desired_extent_in_hole(struct btrfs_inode *inode, int whence,
static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
{
struct btrfs_inode *inode = BTRFS_I(file->f_mapping->host);
- struct btrfs_file_private *private = file->private_data;
+ struct btrfs_file_private *private;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_state *cached_state = NULL;
struct extent_state **delalloc_cached_state;
@@ -3513,7 +3513,19 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
inode_get_bytes(&inode->vfs_inode) == i_size)
return i_size;
- if (!private) {
+ spin_lock(&inode->lock);
+ private = file->private_data;
+ spin_unlock(&inode->lock);
+
+ if (private && private->owner_task != current) {
+ /*
+ * Not allocated by us, don't use it as its cached state is used
+ * by the task that allocated it and we don't want neither to
+ * mess with it nor get incorrect results because it reflects an
+ * invalid state for the current task.
+ */
+ private = NULL;
+ } else if (!private) {
private = kzalloc(sizeof(*private), GFP_KERNEL);
/*
* No worries if memory allocation failed.
@@ -3521,7 +3533,23 @@ static loff_t find_desired_extent(struct file *file, loff_t offset, int whence)
* lseek SEEK_HOLE/DATA calls to a file when there's delalloc,
* so everything will still be correct.
*/
- file->private_data = private;
+ if (private) {
+ bool free = false;
+
+ private->owner_task = current;
+
+ spin_lock(&inode->lock);
+ if (file->private_data)
+ free = true;
+ else
+ file->private_data = private;
+ spin_unlock(&inode->lock);
+
+ if (free) {
+ kfree(private);
+ private = NULL;
+ }
+ }
}
if (private)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index eaa1dbd31352..f4bcb2530660 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3809,7 +3809,7 @@ next:
if (async && *total_trimmed)
break;
- if (fatal_signal_pending(current)) {
+ if (btrfs_trim_interrupted()) {
ret = -ERESTARTSYS;
break;
}
@@ -4000,7 +4000,7 @@ next:
}
block_group->discard_cursor = start;
- if (fatal_signal_pending(current)) {
+ if (btrfs_trim_interrupted()) {
if (start != offset)
reset_trimming_bitmap(ctl, offset);
ret = -ERESTARTSYS;
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 83774bfd7b3b..9f1dbfdee8ca 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -10,6 +10,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
+#include <linux/freezer.h>
#include "fs.h"
struct inode;
@@ -56,6 +57,11 @@ static inline bool btrfs_free_space_trimming_bitmap(
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING);
}
+static inline bool btrfs_trim_interrupted(void)
+{
+ return fatal_signal_pending(current) || freezing(current);
+}
+
/*
* Deltas are an effective way to populate global statistics. Give macro names
* to make it clear what we're doing. An example is discard_extents in
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index edac499fd83d..da51edbad6a0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,7 +32,7 @@
#include <linux/migrate.h>
#include <linux/sched/mm.h>
#include <linux/iomap.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/fsverity.h>
#include "misc.h"
#include "ctree.h"
@@ -3111,6 +3111,11 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
ret = btrfs_update_inode_fallback(trans, inode);
if (ret) /* -ENOMEM or corruption */
btrfs_abort_transaction(trans, ret);
+
+ ret = btrfs_insert_raid_extent(trans, ordered_extent);
+ if (ret)
+ btrfs_abort_transaction(trans, ret);
+
goto out;
}
@@ -4363,11 +4368,8 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
*/
if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
di = btrfs_search_dir_index_item(root, path, dir_ino, &fname.disk_name);
- if (IS_ERR_OR_NULL(di)) {
- if (!di)
- ret = -ENOENT;
- else
- ret = PTR_ERR(di);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
btrfs_abort_transaction(trans, ret);
goto out;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8537eb9b5531..226c91fe31a7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1310,12 +1310,12 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
} else {
struct fd src = fdget(fd);
struct inode *src_inode;
- if (!src.file) {
+ if (!fd_file(src)) {
ret = -EINVAL;
goto out_drop_write;
}
- src_inode = file_inode(src.file);
+ src_inode = file_inode(fd_file(src));
if (src_inode->i_sb != file_inode(file)->i_sb) {
btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index 77752eec125d..363fd28c0268 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -239,7 +239,8 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
vaf.fmt = fmt;
vaf.va = &args;
- if (__ratelimit(ratelimit)) {
+ /* Do not ratelimit if CONFIG_BTRFS_DEBUG is enabled. */
+ if (IS_ENABLED(CONFIG_BTRFS_DEBUG) || __ratelimit(ratelimit)) {
if (fs_info) {
char statestr[STATE_STRING_BUF_LEN];
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c297909f1506..a0e8deca87a7 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1407,7 +1407,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
fs_info->quota_root = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
- fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
+ fs_info->qgroup_drop_subtree_thres = BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT;
spin_unlock(&fs_info->qgroup_lock);
btrfs_free_qgroup_config(fs_info);
@@ -2005,16 +2005,26 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
struct btrfs_qgroup_extent_record *record)
{
struct btrfs_qgroup_extent_record *existing, *ret;
- unsigned long bytenr = record->bytenr;
+ const unsigned long index = (record->bytenr >> fs_info->sectorsize_bits);
if (!btrfs_qgroup_full_accounting(fs_info))
return 1;
+#if BITS_PER_LONG == 32
+ if (record->bytenr >= MAX_LFS_FILESIZE) {
+ btrfs_err_rl(fs_info,
+"qgroup record for extent at %llu is beyond 32bit page cache and xarray index limit",
+ record->bytenr);
+ btrfs_err_32bit_limit(fs_info);
+ return -EOVERFLOW;
+ }
+#endif
+
lockdep_assert_held(&delayed_refs->lock);
trace_btrfs_qgroup_trace_extent(fs_info, record);
xa_lock(&delayed_refs->dirty_extents);
- existing = xa_load(&delayed_refs->dirty_extents, bytenr);
+ existing = xa_load(&delayed_refs->dirty_extents, index);
if (existing) {
if (record->data_rsv && !existing->data_rsv) {
existing->data_rsv = record->data_rsv;
@@ -2024,7 +2034,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
return 1;
}
- ret = __xa_store(&delayed_refs->dirty_extents, record->bytenr, record, GFP_ATOMIC);
+ ret = __xa_store(&delayed_refs->dirty_extents, index, record, GFP_ATOMIC);
xa_unlock(&delayed_refs->dirty_extents);
if (xa_is_err(ret)) {
qgroup_mark_inconsistent(fs_info);
@@ -2129,6 +2139,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_qgroup_extent_record *record;
struct btrfs_delayed_ref_root *delayed_refs;
+ const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
int ret;
if (!btrfs_qgroup_full_accounting(fs_info) || bytenr == 0 || num_bytes == 0)
@@ -2137,7 +2148,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
if (!record)
return -ENOMEM;
- if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, bytenr, GFP_NOFS)) {
+ if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, index, GFP_NOFS)) {
kfree(record);
return -ENOMEM;
}
@@ -2152,7 +2163,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
spin_unlock(&delayed_refs->lock);
if (ret) {
/* Clean up if insertion fails or item exists. */
- xa_release(&delayed_refs->dirty_extents, record->bytenr);
+ xa_release(&delayed_refs->dirty_extents, index);
kfree(record);
return 0;
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 98adf4ec7b01..c229256d6fd5 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -121,6 +121,8 @@ struct btrfs_inode;
#define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1ULL << 63)
#define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1ULL << 62)
+#define BTRFS_QGROUP_DROP_SUBTREE_THRES_DEFAULT (3)
+
/*
* Record a dirty extent, and info qgroup to update quota on it
*/
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index ea4ed85919ec..f3834f8d26b4 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -232,70 +232,6 @@ static struct btrfs_backref_node *walk_down_backref(
return NULL;
}
-static void update_backref_node(struct btrfs_backref_cache *cache,
- struct btrfs_backref_node *node, u64 bytenr)
-{
- struct rb_node *rb_node;
- rb_erase(&node->rb_node, &cache->rb_root);
- node->bytenr = bytenr;
- rb_node = rb_simple_insert(&cache->rb_root, node->bytenr, &node->rb_node);
- if (rb_node)
- btrfs_backref_panic(cache->fs_info, bytenr, -EEXIST);
-}
-
-/*
- * update backref cache after a transaction commit
- */
-static int update_backref_cache(struct btrfs_trans_handle *trans,
- struct btrfs_backref_cache *cache)
-{
- struct btrfs_backref_node *node;
- int level = 0;
-
- if (cache->last_trans == 0) {
- cache->last_trans = trans->transid;
- return 0;
- }
-
- if (cache->last_trans == trans->transid)
- return 0;
-
- /*
- * detached nodes are used to avoid unnecessary backref
- * lookup. transaction commit changes the extent tree.
- * so the detached nodes are no longer useful.
- */
- while (!list_empty(&cache->detached)) {
- node = list_entry(cache->detached.next,
- struct btrfs_backref_node, list);
- btrfs_backref_cleanup_node(cache, node);
- }
-
- while (!list_empty(&cache->changed)) {
- node = list_entry(cache->changed.next,
- struct btrfs_backref_node, list);
- list_del_init(&node->list);
- BUG_ON(node->pending);
- update_backref_node(cache, node, node->new_bytenr);
- }
-
- /*
- * some nodes can be left in the pending list if there were
- * errors during processing the pending nodes.
- */
- for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
- list_for_each_entry(node, &cache->pending[level], list) {
- BUG_ON(!node->pending);
- if (node->bytenr == node->new_bytenr)
- continue;
- update_backref_node(cache, node, node->new_bytenr);
- }
- }
-
- cache->last_trans = 0;
- return 1;
-}
-
static bool reloc_root_is_dead(const struct btrfs_root *root)
{
/*
@@ -551,9 +487,6 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
struct btrfs_backref_edge *new_edge;
struct rb_node *rb_node;
- if (cache->last_trans > 0)
- update_backref_cache(trans, cache);
-
rb_node = rb_simple_search(&cache->rb_root, src->commit_root->start);
if (rb_node) {
node = rb_entry(rb_node, struct btrfs_backref_node, rb_node);
@@ -923,7 +856,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
btrfs_grab_root(reloc_root);
/* root->reloc_root will stay until current relocation finished */
- if (fs_info->reloc_ctl->merge_reloc_tree &&
+ if (fs_info->reloc_ctl && fs_info->reloc_ctl->merge_reloc_tree &&
btrfs_root_refs(root_item) == 0) {
set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
/*
@@ -3698,11 +3631,9 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
break;
}
restart:
- if (update_backref_cache(trans, &rc->backref_cache)) {
- btrfs_end_transaction(trans);
- trans = NULL;
- continue;
- }
+ if (rc->backref_cache.last_trans != trans->transid)
+ btrfs_backref_release_cache(&rc->backref_cache);
+ rc->backref_cache.last_trans = trans->transid;
ret = find_next_extent(rc, path, &key);
if (ret < 0)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 7f48ba6c1c77..b068469871f8 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -346,8 +346,10 @@ struct name_cache_entry {
u64 parent_gen;
int ret;
int need_later_update;
+ /* Name length without NUL terminator. */
int name_len;
- char name[] __counted_by(name_len);
+ /* Not NUL terminated. */
+ char name[] __counted_by(name_len) __nonstring;
};
/* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */
@@ -2388,7 +2390,7 @@ out_cache:
/*
* Store the result of the lookup in the name cache.
*/
- nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
+ nce = kmalloc(sizeof(*nce) + fs_path_len(dest), GFP_KERNEL);
if (!nce) {
ret = -ENOMEM;
goto out;
@@ -2400,7 +2402,7 @@ out_cache:
nce->parent_gen = *parent_gen;
nce->name_len = fs_path_len(dest);
nce->ret = ret;
- strcpy(nce->name, dest->start);
+ memcpy(nce->name, dest->start, nce->name_len);
if (ino < sctx->send_progress)
nce->need_later_update = 0;
@@ -6187,8 +6189,29 @@ static int send_write_or_clone(struct send_ctx *sctx,
if (ret < 0)
return ret;
- if (clone_root->offset + num_bytes == info.size)
+ if (clone_root->offset + num_bytes == info.size) {
+ /*
+ * The final size of our file matches the end offset, but it may
+ * be that its current size is larger, so we have to truncate it
+ * to any value between the start offset of the range and the
+ * final i_size, otherwise the clone operation is invalid
+ * because it's unaligned and it ends before the current EOF.
+ * We do this truncate to the final i_size when we finish
+ * processing the inode, but it's too late by then. And here we
+ * truncate to the start offset of the range because it's always
+ * sector size aligned while if it were the final i_size it
+ * would result in dirtying part of a page, filling part of a
+ * page with zeroes and then having the clone operation at the
+ * receiver trigger IO and wait for it due to the dirty page.
+ */
+ if (sctx->parent_root != NULL) {
+ ret = send_truncate(sctx, sctx->cur_ino,
+ sctx->cur_inode_gen, offset);
+ if (ret < 0)
+ return ret;
+ }
goto clone_data;
+ }
write_data:
ret = send_extent_data(sctx, path, offset, num_bytes);
@@ -7167,13 +7190,11 @@ static int changed_extent(struct send_ctx *sctx,
static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
{
- int ret = 0;
-
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result == BTRFS_COMPARE_TREE_NEW)
sctx->cur_inode_needs_verity = true;
}
- return ret;
+ return 0;
}
static int dir_changed(struct send_ctx *sctx, u64 dir)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 98fa0f382480..926d7a9ed99d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -340,6 +340,15 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
fallthrough;
case Opt_compress:
case Opt_compress_type:
+ /*
+ * Provide the same semantics as older kernels that don't use fs
+ * context, specifying the "compress" option clears
+ * "force-compress" without the need to pass
+ * "compress-force=[no|none]" before specifying "compress".
+ */
+ if (opt != Opt_compress_force && opt != Opt_compress_force_type)
+ btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS);
+
if (opt == Opt_compress || opt == Opt_compress_force) {
ctx->compress_type = BTRFS_COMPRESS_ZLIB;
ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL;
@@ -1498,8 +1507,7 @@ static int btrfs_reconfigure(struct fs_context *fc)
sync_filesystem(sb);
set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
- if (!mount_reconfigure &&
- !btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags))
+ if (!btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags))
return -EINVAL;
ret = btrfs_check_features(fs_info, !(fc->sb_flags & SB_RDONLY));
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 634d69964fe4..7b50263723bc 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1517,7 +1517,7 @@ static int check_extent_item(struct extent_buffer *leaf,
dref_objectid > BTRFS_LAST_FREE_OBJECTID)) {
extent_err(leaf, slot,
"invalid data ref objectid value %llu",
- dref_root);
+ dref_objectid);
return -EUCLEAN;
}
if (unlikely(!IS_ALIGNED(dref_offset,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e2ed2a791f8f..9637c7cdc0cf 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1374,7 +1374,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
struct inode *inode = NULL;
unsigned long ref_ptr;
unsigned long ref_end;
- struct fscrypt_str name;
+ struct fscrypt_str name = { 0 };
int ret;
int log_ref_ver = 0;
u64 parent_objectid;
@@ -1845,7 +1845,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
struct btrfs_dir_item *di,
struct btrfs_key *key)
{
- struct fscrypt_str name;
+ struct fscrypt_str name = { 0 };
struct btrfs_dir_item *dir_dst_di;
struct btrfs_dir_item *index_dst_di;
bool dir_dst_matches = false;
@@ -2125,7 +2125,7 @@ static noinline int check_item_in_log(struct btrfs_trans_handle *trans,
struct extent_buffer *eb;
int slot;
struct btrfs_dir_item *di;
- struct fscrypt_str name;
+ struct fscrypt_str name = { 0 };
struct inode *inode = NULL;
struct btrfs_key location;
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index c6399513c66f..aca2861f2187 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -5,7 +5,7 @@
#include <linux/kthread.h>
#include <linux/uuid.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include "messages.h"
#include "ctree.h"
#include "transaction.h"
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8f340ad1d938..eb51b609190f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1105,6 +1105,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
if (device->bdev) {
fs_devices->open_devices--;
device->bdev = NULL;
+ device->bdev_file = NULL;
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
btrfs_destroy_dev_zone_info(device);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 03d2d60afe0c..4481575dd70f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -30,6 +30,12 @@ struct btrfs_zoned_device_info;
#define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
+/*
+ * Arbitratry maximum size of one discard request to limit potentially long time
+ * spent in blkdev_issue_discard().
+ */
+#define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
+
extern struct mutex uuid_mutex;
#define BTRFS_STRIPE_LEN SZ_64K
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 7fa2920632ba..69d03feea4e0 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1340,7 +1340,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
switch (zone.cond) {
case BLK_ZONE_COND_OFFLINE:
case BLK_ZONE_COND_READONLY:
- btrfs_err(fs_info,
+ btrfs_err_in_rcu(fs_info,
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
(info->physical >> device->zone_info->zone_size_shift),
rcu_str_deref(device->name), device->devid);