summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/bio.c2
-rw-r--r--fs/btrfs/block-group.c86
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/extent_map.c7
-rw-r--r--fs/btrfs/free-space-cache.c8
-rw-r--r--fs/btrfs/fs.h7
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/ioctl.c1
-rw-r--r--fs/btrfs/space-info.c42
-rw-r--r--fs/btrfs/space-info.h2
-rw-r--r--fs/btrfs/sysfs.c42
-rw-r--r--fs/btrfs/volumes.c3
-rw-r--r--fs/btrfs/zoned.c45
-rw-r--r--fs/cifs/cifs_debug.c5
-rw-r--r--fs/cifs/cifs_dfs_ref.c1
-rw-r--r--fs/cifs/cifs_fs_sb.h2
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/connect.c10
-rw-r--r--fs/cifs/dfs.c67
-rw-r--r--fs/cifs/dfs.h19
-rw-r--r--fs/cifs/dfs_cache.c140
-rw-r--r--fs/cifs/dfs_cache.h2
-rw-r--r--fs/cifs/fs_context.h1
-rw-r--r--fs/cifs/misc.c8
-rw-r--r--fs/cifs/smb2inode.c31
-rw-r--r--fs/cifs/smb2transport.c2
-rw-r--r--fs/cifs/transport.c21
-rw-r--r--fs/crypto/keyring.c23
-rw-r--r--fs/erofs/data.c2
-rw-r--r--fs/erofs/decompressor_lzma.c4
-rw-r--r--fs/erofs/internal.h4
-rw-r--r--fs/erofs/pcpubuf.c2
-rw-r--r--fs/erofs/zdata.c12
-rw-r--r--fs/erofs/zmap.c3
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/fsmap.c2
-rw-r--r--fs/ext4/inline.c1
-rw-r--r--fs/ext4/inode.c7
-rw-r--r--fs/ext4/ioctl.c1
-rw-r--r--fs/ext4/namei.c43
-rw-r--r--fs/ext4/page-io.c11
-rw-r--r--fs/ext4/super.c38
-rw-r--r--fs/ext4/sysfs.c4
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/file.c1
-rw-r--r--fs/gfs2/dentry.c18
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/lockd/clnt4xdr.c9
-rw-r--r--fs/lockd/xdr4.c13
-rw-r--r--fs/locks.c4
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfsd/Kconfig2
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/ocfs2/aops.c19
-rw-r--r--fs/splice.c1
-rw-r--r--fs/super.c15
-rw-r--r--fs/udf/inode.c14
-rw-r--r--fs/verity/enable.c25
-rw-r--r--fs/verity/verify.c12
-rw-r--r--fs/xfs/xfs_aops.c21
-rw-r--r--fs/xfs/xfs_qm.c40
-rw-r--r--fs/zonefs/file.c4
64 files changed, 536 insertions, 418 deletions
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index d8b90f95b157..726592868e9c 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -287,7 +287,7 @@ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
- if (!(bio->bi_opf & REQ_RAHEAD))
+ else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5b10401d803b..5fc670c27f86 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -558,14 +558,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
static int sample_block_group_extent_item(struct btrfs_caching_control *caching_ctl,
struct btrfs_block_group *block_group,
int index, int max_index,
- struct btrfs_key *key)
+ struct btrfs_key *found_key)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_root *extent_root;
- int ret = 0;
u64 search_offset;
u64 search_end = block_group->start + block_group->length;
struct btrfs_path *path;
+ struct btrfs_key search_key;
+ int ret = 0;
ASSERT(index >= 0);
ASSERT(index <= max_index);
@@ -585,37 +586,24 @@ static int sample_block_group_extent_item(struct btrfs_caching_control *caching_
path->reada = READA_FORWARD;
search_offset = index * div_u64(block_group->length, max_index);
- key->objectid = block_group->start + search_offset;
- key->type = BTRFS_EXTENT_ITEM_KEY;
- key->offset = 0;
+ search_key.objectid = block_group->start + search_offset;
+ search_key.type = BTRFS_EXTENT_ITEM_KEY;
+ search_key.offset = 0;
- while (1) {
- ret = btrfs_search_forward(extent_root, key, path, 0);
- if (ret != 0)
- goto out;
+ btrfs_for_each_slot(extent_root, &search_key, found_key, path, ret) {
/* Success; sampled an extent item in the block group */
- if (key->type == BTRFS_EXTENT_ITEM_KEY &&
- key->objectid >= block_group->start &&
- key->objectid + key->offset <= search_end)
- goto out;
+ if (found_key->type == BTRFS_EXTENT_ITEM_KEY &&
+ found_key->objectid >= block_group->start &&
+ found_key->objectid + found_key->offset <= search_end)
+ break;
/* We can't possibly find a valid extent item anymore */
- if (key->objectid >= search_end) {
+ if (found_key->objectid >= search_end) {
ret = 1;
break;
}
- if (key->type < BTRFS_EXTENT_ITEM_KEY)
- key->type = BTRFS_EXTENT_ITEM_KEY;
- else
- key->objectid++;
- btrfs_release_path(path);
- up_read(&fs_info->commit_root_sem);
- mutex_unlock(&caching_ctl->mutex);
- cond_resched();
- mutex_lock(&caching_ctl->mutex);
- down_read(&fs_info->commit_root_sem);
}
-out:
+
lockdep_assert_held(&caching_ctl->mutex);
lockdep_assert_held_read(&fs_info->commit_root_sem);
btrfs_free_path(path);
@@ -659,6 +647,7 @@ out:
static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl,
struct btrfs_block_group *block_group)
{
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
struct btrfs_key key;
int i;
u64 min_size = block_group->length;
@@ -668,6 +657,8 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl
if (!btrfs_block_group_should_use_size_class(block_group))
return 0;
+ lockdep_assert_held(&caching_ctl->mutex);
+ lockdep_assert_held_read(&fs_info->commit_root_sem);
for (i = 0; i < 5; ++i) {
ret = sample_block_group_extent_item(caching_ctl, block_group, i, 5, &key);
if (ret < 0)
@@ -682,7 +673,6 @@ static int load_block_group_size_class(struct btrfs_caching_control *caching_ctl
block_group->size_class = size_class;
spin_unlock(&block_group->lock);
}
-
out:
return ret;
}
@@ -1185,14 +1175,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
< block_group->zone_unusable);
WARN_ON(block_group->space_info->disk_total
< block_group->length * factor);
- WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
- &block_group->runtime_flags) &&
- block_group->space_info->active_total_bytes
- < block_group->length);
}
block_group->space_info->total_bytes -= block_group->length;
- if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
- block_group->space_info->active_total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
block_group->space_info->bytes_zone_unusable -=
@@ -1836,7 +1820,8 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
btrfs_info(fs_info,
"reclaiming chunk %llu with %llu%% used %llu%% unusable",
- bg->start, div_u64(bg->used * 100, bg->length),
+ bg->start,
+ div64_u64(bg->used * 100, bg->length),
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
@@ -2493,18 +2478,29 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
struct btrfs_block_group_item bgi;
struct btrfs_root *root = btrfs_block_group_root(fs_info);
struct btrfs_key key;
+ u64 old_commit_used;
+ int ret;
spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&bgi, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi,
block_group->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
+ old_commit_used = block_group->commit_used;
+ block_group->commit_used = block_group->used;
key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
key.offset = block_group->length;
spin_unlock(&block_group->lock);
- return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
+ ret = btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
+ if (ret < 0) {
+ spin_lock(&block_group->lock);
+ block_group->commit_used = old_commit_used;
+ spin_unlock(&block_group->lock);
+ }
+
+ return ret;
}
static int insert_dev_extent(struct btrfs_trans_handle *trans,
@@ -3474,6 +3470,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&info->delalloc_root_lock);
while (total) {
+ struct btrfs_space_info *space_info;
bool reclaim = false;
cache = btrfs_lookup_block_group(info, bytenr);
@@ -3481,6 +3478,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
ret = -ENOENT;
break;
}
+ space_info = cache->space_info;
factor = btrfs_bg_type_to_factor(cache->flags);
/*
@@ -3495,7 +3493,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
byte_in_group = bytenr - cache->start;
WARN_ON(byte_in_group > cache->length);
- spin_lock(&cache->space_info->lock);
+ spin_lock(&space_info->lock);
spin_lock(&cache->lock);
if (btrfs_test_opt(info, SPACE_CACHE) &&
@@ -3508,24 +3506,24 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
old_val += num_bytes;
cache->used = old_val;
cache->reserved -= num_bytes;
- cache->space_info->bytes_reserved -= num_bytes;
- cache->space_info->bytes_used += num_bytes;
- cache->space_info->disk_used += num_bytes * factor;
+ space_info->bytes_reserved -= num_bytes;
+ space_info->bytes_used += num_bytes;
+ space_info->disk_used += num_bytes * factor;
spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
+ spin_unlock(&space_info->lock);
} else {
old_val -= num_bytes;
cache->used = old_val;
cache->pinned += num_bytes;
- btrfs_space_info_update_bytes_pinned(info,
- cache->space_info, num_bytes);
- cache->space_info->bytes_used -= num_bytes;
- cache->space_info->disk_used -= num_bytes * factor;
+ btrfs_space_info_update_bytes_pinned(info, space_info,
+ num_bytes);
+ space_info->bytes_used -= num_bytes;
+ space_info->disk_used -= num_bytes * factor;
reclaim = should_reclaim_block_group(cache, num_bytes);
spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
+ spin_unlock(&space_info->lock);
set_extent_dirty(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 0095c6e4c3d1..6b457b010cbc 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1048,7 +1048,7 @@ again:
* so there is only one iref. The case that several irefs are
* in the same item doesn't exist.
*/
- btrfs_del_item(trans, root, path);
+ ret = btrfs_del_item(trans, root, path);
out:
btrfs_release_delayed_iref(node);
btrfs_release_path(path);
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index be94030e1dfb..138afa955370 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -763,7 +763,13 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
goto next;
}
+ flags = em->flags;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+ /*
+ * In case we split the extent map, we want to preserve the
+ * EXTENT_FLAG_LOGGING flag on our extent map, but we don't want
+ * it on the new extent maps.
+ */
clear_bit(EXTENT_FLAG_LOGGING, &flags);
modified = !list_empty(&em->list);
@@ -774,7 +780,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
if (em->start >= start && em_end <= end)
goto remove_em;
- flags = em->flags;
gen = em->generation;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0d250d052487..d84cef89cdff 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2693,8 +2693,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
spin_lock(&ctl->tree_lock);
+ /* Count initial region as zone_unusable until it gets activated. */
if (!used)
to_free = size;
+ else if (initial &&
+ test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
+ (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+ to_free = 0;
else if (initial)
to_free = block_group->zone_capacity;
else if (offset >= block_group->alloc_offset)
@@ -2722,7 +2727,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
reclaimable_unusable = block_group->zone_unusable -
(block_group->length - block_group->zone_capacity);
/* All the region is now unusable. Mark it as unused and reclaim */
- if (block_group->zone_unusable == block_group->length) {
+ if (block_group->zone_unusable == block_group->length &&
+ block_group->alloc_offset) {
btrfs_mark_bg_unused(block_group);
} else if (bg_reclaim_threshold &&
reclaimable_unusable >=
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 4c477eae6891..24cd49229408 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -120,11 +120,8 @@ enum {
/* Indicate that we want to commit the transaction. */
BTRFS_FS_NEED_TRANS_COMMIT,
- /*
- * Indicate metadata over-commit is disabled. This is set when active
- * zone tracking is needed.
- */
- BTRFS_FS_NO_OVERCOMMIT,
+ /* This is set when active zone tracking is needed. */
+ BTRFS_FS_ACTIVE_ZONE_TRACKING,
/*
* Indicate if we have some features changed, this is mostly for
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6c18dc9a1831..957e4d76a7b6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5421,8 +5421,13 @@ static int btrfs_inode_by_name(struct btrfs_inode *dir, struct dentry *dentry,
return -ENOMEM;
ret = fscrypt_setup_filename(&dir->vfs_inode, &dentry->d_name, 1, &fname);
- if (ret)
+ if (ret < 0)
goto out;
+ /*
+ * fscrypt_setup_filename() should never return a positive value, but
+ * gcc on sparc/parisc thinks it can, so assert that doesn't happen.
+ */
+ ASSERT(ret == 0);
/* This needs to handle no-key deletions later on */
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 84626c8ad5bf..a0ef1a1784c7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2859,6 +2859,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info,
di_args->bytes_used = btrfs_device_get_bytes_used(dev);
di_args->total_bytes = btrfs_device_get_total_bytes(dev);
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
+ memcpy(di_args->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
if (dev->name)
strscpy(di_args->path, btrfs_dev_name(dev), sizeof(di_args->path));
else
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 69c09508afb5..3eecce86f63f 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -308,8 +308,6 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
ASSERT(found);
spin_lock(&found->lock);
found->total_bytes += block_group->length;
- if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
- found->active_total_bytes += block_group->length;
found->disk_total += block_group->length * factor;
found->bytes_used += block_group->used;
found->disk_used += block_group->used * factor;
@@ -379,22 +377,6 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
return avail;
}
-static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
- struct btrfs_space_info *space_info)
-{
- /*
- * On regular filesystem, all total_bytes are always writable. On zoned
- * filesystem, there may be a limitation imposed by max_active_zones.
- * For metadata allocation, we cannot finish an existing active block
- * group to avoid a deadlock. Thus, we need to consider only the active
- * groups to be writable for metadata space.
- */
- if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
- return space_info->total_bytes;
-
- return space_info->active_total_bytes;
-}
-
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, u64 bytes,
enum btrfs_reserve_flush_enum flush)
@@ -407,13 +389,13 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
return 0;
used = btrfs_space_info_used(space_info, true);
- if (test_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags) &&
+ if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
(space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
avail = 0;
else
avail = calc_available_free_space(fs_info, space_info, flush);
- if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
+ if (used + bytes < space_info->total_bytes + avail)
return 1;
return 0;
}
@@ -449,7 +431,7 @@ again:
ticket = list_first_entry(head, struct reserve_ticket, list);
/* Check and see if our ticket can be satisfied now. */
- if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
+ if ((used + ticket->bytes <= space_info->total_bytes) ||
btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
flush)) {
btrfs_space_info_update_bytes_may_use(fs_info,
@@ -829,7 +811,6 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
{
u64 used;
u64 avail;
- u64 total;
u64 to_reclaim = space_info->reclaim_size;
lockdep_assert_held(&space_info->lock);
@@ -844,9 +825,8 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
* space. If that's the case add in our overage so we make sure to put
* appropriate pressure on the flushing state machine.
*/
- total = writable_total_bytes(fs_info, space_info);
- if (total + avail < used)
- to_reclaim += used - (total + avail);
+ if (space_info->total_bytes + avail < used)
+ to_reclaim += used - (space_info->total_bytes + avail);
return to_reclaim;
}
@@ -856,11 +836,10 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
{
u64 global_rsv_size = fs_info->global_block_rsv.reserved;
u64 ordered, delalloc;
- u64 total = writable_total_bytes(fs_info, space_info);
u64 thresh;
u64 used;
- thresh = mult_perc(total, 90);
+ thresh = mult_perc(space_info->total_bytes, 90);
lockdep_assert_held(&space_info->lock);
@@ -923,8 +902,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
BTRFS_RESERVE_FLUSH_ALL);
used = space_info->bytes_used + space_info->bytes_reserved +
space_info->bytes_readonly + global_rsv_size;
- if (used < total)
- thresh += total - used;
+ if (used < space_info->total_bytes)
+ thresh += space_info->total_bytes - used;
thresh >>= space_info->clamp;
used = space_info->bytes_pinned;
@@ -1651,7 +1630,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
* can_overcommit() to ensure we can overcommit to continue.
*/
if (!pending_tickets &&
- ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
+ ((used + orig_bytes <= space_info->total_bytes) ||
btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
orig_bytes);
@@ -1665,8 +1644,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
*/
if (ret && unlikely(flush == BTRFS_RESERVE_FLUSH_EMERGENCY)) {
used = btrfs_space_info_used(space_info, false);
- if (used + orig_bytes <=
- writable_total_bytes(fs_info, space_info)) {
+ if (used + orig_bytes <= space_info->total_bytes) {
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
orig_bytes);
ret = 0;
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index fc99ea2b0c34..2033b71b18ce 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -96,8 +96,6 @@ struct btrfs_space_info {
u64 bytes_may_use; /* number of bytes that may be used for
delalloc/allocations */
u64 bytes_readonly; /* total bytes that are read only */
- /* Total bytes in the space, but only accounts active block groups. */
- u64 active_total_bytes;
u64 bytes_zone_unusable; /* total bytes that are unusable until
resetting the device zone */
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 8c5efa5813b3..37fc58a7f27e 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -9,6 +9,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/bug.h>
+#include <linux/list.h>
#include <crypto/hash.h>
#include "messages.h"
#include "ctree.h"
@@ -778,6 +779,45 @@ static ssize_t btrfs_chunk_size_store(struct kobject *kobj,
return len;
}
+static ssize_t btrfs_size_classes_show(struct kobject *kobj,
+ struct kobj_attribute *a, char *buf)
+{
+ struct btrfs_space_info *sinfo = to_space_info(kobj);
+ struct btrfs_block_group *bg;
+ u32 none = 0;
+ u32 small = 0;
+ u32 medium = 0;
+ u32 large = 0;
+
+ for (int i = 0; i < BTRFS_NR_RAID_TYPES; ++i) {
+ down_read(&sinfo->groups_sem);
+ list_for_each_entry(bg, &sinfo->block_groups[i], list) {
+ if (!btrfs_block_group_should_use_size_class(bg))
+ continue;
+ switch (bg->size_class) {
+ case BTRFS_BG_SZ_NONE:
+ none++;
+ break;
+ case BTRFS_BG_SZ_SMALL:
+ small++;
+ break;
+ case BTRFS_BG_SZ_MEDIUM:
+ medium++;
+ break;
+ case BTRFS_BG_SZ_LARGE:
+ large++;
+ break;
+ }
+ }
+ up_read(&sinfo->groups_sem);
+ }
+ return sysfs_emit(buf, "none %u\n"
+ "small %u\n"
+ "medium %u\n"
+ "large %u\n",
+ none, small, medium, large);
+}
+
#ifdef CONFIG_BTRFS_DEBUG
/*
* Request chunk allocation with current chunk size.
@@ -835,6 +875,7 @@ SPACE_INFO_ATTR(bytes_zone_unusable);
SPACE_INFO_ATTR(disk_used);
SPACE_INFO_ATTR(disk_total);
BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show, btrfs_chunk_size_store);
+BTRFS_ATTR(space_info, size_classes, btrfs_size_classes_show);
static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
struct kobj_attribute *a,
@@ -887,6 +928,7 @@ static struct attribute *space_info_attrs[] = {
BTRFS_ATTR_PTR(space_info, disk_total),
BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
BTRFS_ATTR_PTR(space_info, chunk_size),
+ BTRFS_ATTR_PTR(space_info, size_classes),
#ifdef CONFIG_BTRFS_DEBUG
BTRFS_ATTR_PTR(space_info, force_chunk_alloc),
#endif
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7823168c08a6..6d0124b6e79e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6363,7 +6363,8 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
ASSERT(op != BTRFS_MAP_DISCARD);
em = btrfs_get_chunk_map(fs_info, logical, *length);
- ASSERT(!IS_ERR(em));
+ if (IS_ERR(em))
+ return PTR_ERR(em);
map = em->map_lookup;
data_stripes = nr_data_stripes(map);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index f95b2c94d619..45d04092f2f8 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -524,8 +524,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
}
atomic_set(&zone_info->active_zones_left,
max_active_zones - nactive);
- /* Overcommit does not work well with active zone tacking. */
- set_bit(BTRFS_FS_NO_OVERCOMMIT, &fs_info->flags);
+ set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags);
}
/* Validate superblock log */
@@ -1581,9 +1580,19 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
return;
WARN_ON(cache->bytes_super != 0);
- unusable = (cache->alloc_offset - cache->used) +
- (cache->length - cache->zone_capacity);
- free = cache->zone_capacity - cache->alloc_offset;
+
+ /* Check for block groups never get activated */
+ if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
+ cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
+ !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
+ cache->alloc_offset == 0) {
+ unusable = cache->length;
+ free = 0;
+ } else {
+ unusable = (cache->alloc_offset - cache->used) +
+ (cache->length - cache->zone_capacity);
+ free = cache->zone_capacity - cache->alloc_offset;
+ }
/* We only need ->free_space in ALLOC_SEQ block groups */
cache->cached = BTRFS_CACHE_FINISHED;
@@ -1902,7 +1911,11 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
/* Successfully activated all the zones */
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
- space_info->active_total_bytes += block_group->length;
+ WARN_ON(block_group->alloc_offset != 0);
+ if (block_group->zone_unusable == block_group->length) {
+ block_group->zone_unusable = block_group->length - block_group->zone_capacity;
+ space_info->bytes_zone_unusable -= block_group->zone_capacity;
+ }
spin_unlock(&block_group->lock);
btrfs_try_granting_tickets(fs_info, space_info);
spin_unlock(&space_info->lock);
@@ -2086,11 +2099,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
if (!device->bdev)
continue;
- if (!zinfo->max_active_zones ||
- atomic_read(&zinfo->active_zones_left)) {
+ if (!zinfo->max_active_zones) {
ret = true;
break;
}
+
+ switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+ case 0: /* single */
+ ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+ break;
+ case BTRFS_BLOCK_GROUP_DUP:
+ ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+ break;
+ }
+ if (ret)
+ break;
}
mutex_unlock(&fs_info->chunk_mutex);
@@ -2256,7 +2279,7 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
u64 avail;
spin_lock(&block_group->lock);
- if (block_group->reserved ||
+ if (block_group->reserved || block_group->alloc_offset == 0 ||
(block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
spin_unlock(&block_group->lock);
continue;
@@ -2293,10 +2316,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
return 0;
- /* No more block groups to activate */
- if (space_info->active_total_bytes == space_info->total_bytes)
- return 0;
-
for (;;) {
int ret;
bool need_finish = false;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 1911f7016fa1..19a70a69c760 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -420,6 +420,11 @@ skip_rdma:
from_kuid(&init_user_ns, ses->linux_uid),
from_kuid(&init_user_ns, ses->cred_uid));
+ if (ses->dfs_root_ses) {
+ seq_printf(m, "\n\tDFS root session id: 0x%llx",
+ ses->dfs_root_ses->Suid);
+ }
+
spin_lock(&ses->chan_lock);
if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0))
seq_puts(m, "\tPrimary channel: DISCONNECTED ");
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 2b1a8d55b4ec..cb40074feb3e 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -179,6 +179,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct path *path)
tmp.source = full_path;
tmp.leaf_fullpath = NULL;
tmp.UNC = tmp.prepath = NULL;
+ tmp.dfs_root_ses = NULL;
rc = smb3_fs_context_dup(ctx, &tmp);
if (rc) {
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 013a4bd65280..651759192280 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -61,8 +61,6 @@ struct cifs_sb_info {
/* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */
char *prepath;
- /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
- uuid_t dfs_mount_id;
/*
* Indicate whether serverino option was turned off later
* (cifs_autodisable_serverino) in order to match new mounts.
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a99883f16d94..08a73dcb7786 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1233,6 +1233,7 @@ struct cifs_tcon {
/* BB add field for back pointer to sb struct(s)? */
#ifdef CONFIG_CIFS_DFS_UPCALL
struct list_head ulist; /* cache update list */
+ struct list_head dfs_ses_list;
#endif
struct delayed_work query_interfaces; /* query interfaces workqueue job */
};
@@ -1749,9 +1750,8 @@ struct cifs_mount_ctx {
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- struct cifs_ses *root_ses;
- uuid_t mount_id;
char *origin_fullpath, *leaf_fullpath;
+ struct list_head dfs_ses_list;
};
static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5233f14f0636..0eceddde7140 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2229,6 +2229,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
* need to lock before changing something in the session.
*/
spin_lock(&cifs_tcp_ses_lock);
+ ses->dfs_root_ses = ctx->dfs_root_ses;
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -3407,7 +3408,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
bool isdfs;
int rc;
- uuid_gen(&mnt_ctx.mount_id);
+ INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
+
rc = dfs_mount_share(&mnt_ctx, &isdfs);
if (rc)
goto error;
@@ -3427,7 +3429,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
kfree(cifs_sb->prepath);
cifs_sb->prepath = ctx->prepath;
ctx->prepath = NULL;
- uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
out:
cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
@@ -3439,7 +3440,7 @@ out:
return rc;
error:
- dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
+ dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
kfree(mnt_ctx.origin_fullpath);
kfree(mnt_ctx.leaf_fullpath);
cifs_mount_put_conns(&mnt_ctx);
@@ -3637,9 +3638,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
spin_unlock(&cifs_sb->tlink_tree_lock);
kfree(cifs_sb->prepath);
-#ifdef CONFIG_CIFS_DFS_UPCALL
- dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id);
-#endif
call_rcu(&cifs_sb->rcu, delayed_free);
}
diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c
index b64d20374b9c..c8bda52fa096 100644
--- a/fs/cifs/dfs.c
+++ b/fs/cifs/dfs.c
@@ -95,25 +95,31 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
ctx->leaf_fullpath = (char *)full_path;
rc = cifs_mount_get_session(mnt_ctx);
ctx->leaf_fullpath = NULL;
- if (!rc) {
- struct cifs_ses *ses = mnt_ctx->ses;
- mutex_lock(&ses->session_mutex);
- ses->dfs_root_ses = mnt_ctx->root_ses;
- mutex_unlock(&ses->session_mutex);
- }
return rc;
}
-static void set_root_ses(struct cifs_mount_ctx *mnt_ctx)
+static int get_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
{
- if (mnt_ctx->ses) {
+ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+ struct dfs_root_ses *root_ses;
+ struct cifs_ses *ses = mnt_ctx->ses;
+
+ if (ses) {
+ root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
+ if (!root_ses)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&root_ses->list);
+
spin_lock(&cifs_tcp_ses_lock);
- mnt_ctx->ses->ses_count++;
+ ses->ses_count++;
spin_unlock(&cifs_tcp_ses_lock);
- dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses);
+ root_ses->ses = ses;
+ list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
}
- mnt_ctx->root_ses = mnt_ctx->ses;
+ ctx->dfs_root_ses = ses;
+ return 0;
}
static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path,
@@ -121,7 +127,8 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co
{
struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct dfs_info3_param ref = {};
- int rc;
+ bool is_refsrv = false;
+ int rc, rc2;
rc = dfs_cache_get_tgt_referral(ref_path + 1, tit, &ref);
if (rc)
@@ -136,8 +143,7 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co
if (rc)
goto out;
- if (ref.flags & DFSREF_REFERRAL_SERVER)
- set_root_ses(mnt_ctx);
+ is_refsrv = !!(ref.flags & DFSREF_REFERRAL_SERVER);
rc = -EREMOTE;
if (ref.flags & DFSREF_STORAGE_SERVER) {
@@ -146,13 +152,17 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co
goto out;
/* some servers may not advertise referral capability under ref.flags */
- if (!(ref.flags & DFSREF_REFERRAL_SERVER) &&
- is_tcon_dfs(mnt_ctx->tcon))
- set_root_ses(mnt_ctx);
+ is_refsrv |= is_tcon_dfs(mnt_ctx->tcon);
rc = cifs_is_path_remote(mnt_ctx);
}
+ if (rc == -EREMOTE && is_refsrv) {
+ rc2 = get_root_smb_session(mnt_ctx);
+ if (rc2)
+ rc = rc2;
+ }
+
out:
free_dfs_info_param(&ref);
return rc;
@@ -165,6 +175,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
char *ref_path = NULL, *full_path = NULL;
struct dfs_cache_tgt_iterator *tit;
struct TCP_Server_Info *server;
+ struct cifs_tcon *tcon;
char *origin_fullpath = NULL;
int num_links = 0;
int rc;
@@ -234,12 +245,22 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
if (!rc) {
server = mnt_ctx->server;
+ tcon = mnt_ctx->tcon;
mutex_lock(&server->refpath_lock);
- server->origin_fullpath = origin_fullpath;
- server->current_fullpath = server->leaf_fullpath;
+ if (!server->origin_fullpath) {
+ server->origin_fullpath = origin_fullpath;
+ server->current_fullpath = server->leaf_fullpath;
+ origin_fullpath = NULL;
+ }
mutex_unlock(&server->refpath_lock);
- origin_fullpath = NULL;
+
+ if (list_empty(&tcon->dfs_ses_list)) {
+ list_replace_init(&mnt_ctx->dfs_ses_list,
+ &tcon->dfs_ses_list);
+ } else {
+ dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
+ }
}
out:
@@ -260,7 +281,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
rc = get_session(mnt_ctx, NULL);
if (rc)
return rc;
- mnt_ctx->root_ses = mnt_ctx->ses;
+ ctx->dfs_root_ses = mnt_ctx->ses;
/*
* If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
* try to get an DFS referral (even cached) to determine whether it is an DFS mount.
@@ -280,7 +301,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
}
*isdfs = true;
- set_root_ses(mnt_ctx);
+ rc = get_root_smb_session(mnt_ctx);
+ if (rc)
+ return rc;
return __dfs_mount_share(mnt_ctx);
}
diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h
index 344bea6d8bab..13f26e01f7b9 100644
--- a/fs/cifs/dfs.h
+++ b/fs/cifs/dfs.h
@@ -10,6 +10,11 @@
#include "fs_context.h"
#include "cifs_unicode.h"
+struct dfs_root_ses {
+ struct list_head list;
+ struct cifs_ses *ses;
+};
+
int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
struct smb3_fs_context *ctx);
int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
@@ -22,9 +27,10 @@ static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path)
static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *path,
struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tl)
{
+ struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
- return dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
+ return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
cifs_remap(cifs_sb), path, ref, tl);
}
@@ -43,4 +49,15 @@ static inline char *dfs_get_automount_devname(struct dentry *dentry, void *page)
true);
}
+static inline void dfs_put_root_smb_sessions(struct list_head *head)
+{
+ struct dfs_root_ses *root, *tmp;
+
+ list_for_each_entry_safe(root, tmp, head, list) {
+ list_del_init(&root->list);
+ cifs_put_smb_ses(root->ses);
+ kfree(root);
+ }
+}
+
#endif /* _CIFS_DFS_H */
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index ac86bd0ebd63..1c59811bfa73 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -49,17 +49,6 @@ struct cache_entry {
struct cache_dfs_tgt *tgthint;
};
-/* List of referral server sessions per dfs mount */
-struct mount_group {
- struct list_head list;
- uuid_t id;
- struct cifs_ses *sessions[CACHE_MAX_ENTRIES];
- int num_sessions;
- spinlock_t lock;
- struct list_head refresh_list;
- struct kref refcount;
-};
-
static struct kmem_cache *cache_slab __read_mostly;
static struct workqueue_struct *dfscache_wq __read_mostly;
@@ -76,85 +65,10 @@ static atomic_t cache_count;
static struct hlist_head cache_htable[CACHE_HTABLE_SIZE];
static DECLARE_RWSEM(htable_rw_lock);
-static LIST_HEAD(mount_group_list);
-static DEFINE_MUTEX(mount_group_list_lock);
-
static void refresh_cache_worker(struct work_struct *work);
static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker);
-static void __mount_group_release(struct mount_group *mg)
-{
- int i;
-
- for (i = 0; i < mg->num_sessions; i++)
- cifs_put_smb_ses(mg->sessions[i]);
- kfree(mg);
-}
-
-static void mount_group_release(struct kref *kref)
-{
- struct mount_group *mg = container_of(kref, struct mount_group, refcount);
-
- mutex_lock(&mount_group_list_lock);
- list_del(&mg->list);
- mutex_unlock(&mount_group_list_lock);
- __mount_group_release(mg);
-}
-
-static struct mount_group *find_mount_group_locked(const uuid_t *id)
-{
- struct mount_group *mg;
-
- list_for_each_entry(mg, &mount_group_list, list) {
- if (uuid_equal(&mg->id, id))
- return mg;
- }
- return ERR_PTR(-ENOENT);
-}
-
-static struct mount_group *__get_mount_group_locked(const uuid_t *id)
-{
- struct mount_group *mg;
-
- mg = find_mount_group_locked(id);
- if (!IS_ERR(mg))
- return mg;
-
- mg = kmalloc(sizeof(*mg), GFP_KERNEL);
- if (!mg)
- return ERR_PTR(-ENOMEM);
- kref_init(&mg->refcount);
- uuid_copy(&mg->id, id);
- mg->num_sessions = 0;
- spin_lock_init(&mg->lock);
- list_add(&mg->list, &mount_group_list);
- return mg;
-}
-
-static struct mount_group *get_mount_group(const uuid_t *id)
-{
- struct mount_group *mg;
-
- mutex_lock(&mount_group_list_lock);
- mg = __get_mount_group_locked(id);
- if (!IS_ERR(mg))
- kref_get(&mg->refcount);
- mutex_unlock(&mount_group_list_lock);
-
- return mg;
-}
-
-static void free_mount_group_list(void)
-{
- struct mount_group *mg, *tmp_mg;
-
- list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) {
- list_del_init(&mg->list);
- __mount_group_release(mg);
- }
-}
-
/**
* dfs_cache_canonical_path - get a canonical DFS path
*
@@ -704,7 +618,6 @@ void dfs_cache_destroy(void)
{
cancel_delayed_work_sync(&refresh_task);
unload_nls(cache_cp);
- free_mount_group_list();
flush_cache_ents();
kmem_cache_destroy(cache_slab);
destroy_workqueue(dfscache_wq);
@@ -1111,54 +1024,6 @@ out_unlock:
return rc;
}
-/**
- * dfs_cache_add_refsrv_session - add SMB session of referral server
- *
- * @mount_id: mount group uuid to lookup.
- * @ses: reference counted SMB session of referral server.
- */
-void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses)
-{
- struct mount_group *mg;
-
- if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses))
- return;
-
- mg = get_mount_group(mount_id);
- if (WARN_ON_ONCE(IS_ERR(mg)))
- return;
-
- spin_lock(&mg->lock);
- if (mg->num_sessions < ARRAY_SIZE(mg->sessions))
- mg->sessions[mg->num_sessions++] = ses;
- spin_unlock(&mg->lock);
- kref_put(&mg->refcount, mount_group_release);
-}
-
-/**
- * dfs_cache_put_refsrv_sessions - put all referral server sessions
- *
- * Put all SMB sessions from the given mount group id.
- *
- * @mount_id: mount group uuid to lookup.
- */
-void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id)
-{
- struct mount_group *mg;
-
- if (!mount_id || uuid_is_null(mount_id))
- return;
-
- mutex_lock(&mount_group_list_lock);
- mg = find_mount_group_locked(mount_id);
- if (IS_ERR(mg)) {
- mutex_unlock(&mount_group_list_lock);
- return;
- }
- mutex_unlock(&mount_group_list_lock);
- kref_put(&mg->refcount, mount_group_release);
-}
-
/* Extract share from DFS target and return a pointer to prefix path or NULL */
static const char *parse_target_share(const char *target, char **share)
{
@@ -1384,11 +1249,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
cifs_dbg(FYI, "%s: not a dfs mount\n", __func__);
return 0;
}
-
- if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
- cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__);
- return -EINVAL;
- }
/*
* After reconnecting to a different server, unique ids won't match anymore, so we disable
* serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index be3b5a44cf82..e0d39393035a 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -40,8 +40,6 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter
struct dfs_info3_param *ref);
int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share,
char **prefix);
-void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
-void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb);
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 44cb5639ed3b..1b8d4e27f831 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -265,6 +265,7 @@ struct smb3_fs_context {
bool rootfs:1; /* if it's a SMB root file system */
bool witness:1; /* use witness protocol */
char *leaf_fullpath;
+ struct cifs_ses *dfs_root_ses;
};
extern const struct fs_parameter_spec smb3_fs_parameters[];
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a0d286ee723d..b44fb51968bf 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -22,6 +22,7 @@
#ifdef CONFIG_CIFS_DFS_UPCALL
#include "dns_resolve.h"
#include "dfs_cache.h"
+#include "dfs.h"
#endif
#include "fs_context.h"
#include "cached_dir.h"
@@ -134,6 +135,9 @@ tconInfoAlloc(void)
spin_lock_init(&ret_buf->stat_lock);
atomic_set(&ret_buf->num_local_opens, 0);
atomic_set(&ret_buf->num_remote_opens, 0);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
+#endif
return ret_buf;
}
@@ -149,6 +153,9 @@ tconInfoFree(struct cifs_tcon *tcon)
atomic_dec(&tconInfoAllocCount);
kfree(tcon->nativeFileSystem);
kfree_sensitive(tcon->password);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
+#endif
kfree(tcon);
}
@@ -1255,6 +1262,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
* removing cached DFS targets that the client would eventually
* need during failover.
*/
+ ses = CIFS_DFS_ROOT_SES(ses);
if (ses->server->ops->get_dfs_refer &&
!ses->server->ops->get_dfs_refer(xid, ses, ref_path, &refs,
&num_refs, cifs_sb->local_nls,
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 9b956294e864..8dd3791b5c53 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -234,15 +234,32 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
size[0] = 8; /* sizeof __le64 */
data[0] = ptr;
- rc = SMB2_set_info_init(tcon, server,
- &rqst[num_rqst], COMPOUND_FID,
- COMPOUND_FID, current->tgid,
- FILE_END_OF_FILE_INFORMATION,
- SMB2_O_INFO_FILE, 0, data, size);
+ if (cfile) {
+ rc = SMB2_set_info_init(tcon, server,
+ &rqst[num_rqst],
+ cfile->fid.persistent_fid,
+ cfile->fid.volatile_fid,
+ current->tgid,
+ FILE_END_OF_FILE_INFORMATION,
+ SMB2_O_INFO_FILE, 0,
+ data, size);
+ } else {
+ rc = SMB2_set_info_init(tcon, server,
+ &rqst[num_rqst],
+ COMPOUND_FID,
+ COMPOUND_FID,
+ current->tgid,
+ FILE_END_OF_FILE_INFORMATION,
+ SMB2_O_INFO_FILE, 0,
+ data, size);
+ if (!rc) {
+ smb2_set_next_command(tcon, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst]);
+ }
+ }
if (rc)
goto finished;
- smb2_set_next_command(tcon, &rqst[num_rqst]);
- smb2_set_related(&rqst[num_rqst++]);
+ num_rqst++;
trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path);
break;
case SMB2_OP_SET_INFO:
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 381babc1212c..d827b7547ffa 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -425,7 +425,7 @@ generate_smb3signingkey(struct cifs_ses *ses,
/* safe to access primary channel, since it will never go away */
spin_lock(&ses->chan_lock);
- memcpy(ses->chans[0].signkey, ses->smb3signingkey,
+ memcpy(ses->chans[chan_index].signkey, ses->smb3signingkey,
SMB3_SIGN_KEY_SIZE);
spin_unlock(&ses->chan_lock);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b42050c68e6c..24bdd5f4d3bc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -278,7 +278,7 @@ static int
__smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
struct smb_rqst *rqst)
{
- int rc = 0;
+ int rc;
struct kvec *iov;
int n_vec;
unsigned int send_length = 0;
@@ -289,6 +289,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
struct msghdr smb_msg = {};
__be32 rfc1002_marker;
+ cifs_in_send_inc(server);
if (cifs_rdma_enabled(server)) {
/* return -EAGAIN when connecting or reconnecting */
rc = -EAGAIN;
@@ -297,14 +298,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
goto smbd_done;
}
+ rc = -EAGAIN;
if (ssocket == NULL)
- return -EAGAIN;
+ goto out;
+ rc = -ERESTARTSYS;
if (fatal_signal_pending(current)) {
cifs_dbg(FYI, "signal pending before send request\n");
- return -ERESTARTSYS;
+ goto out;
}
+ rc = 0;
/* cork the socket */
tcp_sock_set_cork(ssocket->sk, true);
@@ -407,7 +411,8 @@ smbd_done:
rc);
else if (rc > 0)
rc = 0;
-
+out:
+ cifs_in_send_dec(server);
return rc;
}
@@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
* I/O response may come back and free the mid entry on another thread.
*/
cifs_save_when_sent(mid);
- cifs_in_send_inc(server);
rc = smb_send_rqst(server, 1, rqst, flags);
- cifs_in_send_dec(server);
if (rc < 0) {
revert_current_mid(server, mid->credits);
@@ -1144,9 +1147,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
else
midQ[i]->callback = cifs_compound_last_callback;
}
- cifs_in_send_inc(server);
rc = smb_send_rqst(server, num_rqst, rqst, flags);
- cifs_in_send_dec(server);
for (i = 0; i < num_rqst; i++)
cifs_save_when_sent(midQ[i]);
@@ -1396,9 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
midQ->mid_state = MID_REQUEST_SUBMITTED;
- cifs_in_send_inc(server);
rc = smb_send(server, in_buf, len);
- cifs_in_send_dec(server);
cifs_save_when_sent(midQ);
if (rc < 0)
@@ -1539,9 +1538,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
}
midQ->mid_state = MID_REQUEST_SUBMITTED;
- cifs_in_send_inc(server);
rc = smb_send(server, in_buf, len);
- cifs_in_send_dec(server);
cifs_save_when_sent(midQ);
if (rc < 0)
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 78086f8dbda5..13d336a6cc5d 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -92,6 +92,8 @@ void fscrypt_put_master_key_activeref(struct super_block *sb,
* destroying any subkeys embedded in it.
*/
+ if (WARN_ON(!sb->s_master_keys))
+ return;
spin_lock(&sb->s_master_keys->lock);
hlist_del_rcu(&mk->mk_node);
spin_unlock(&sb->s_master_keys->lock);
@@ -207,10 +209,11 @@ static int allocate_filesystem_keyring(struct super_block *sb)
* Release all encryption keys that have been added to the filesystem, along
* with the keyring that contains them.
*
- * This is called at unmount time. The filesystem's underlying block device(s)
- * are still available at this time; this is important because after user file
- * accesses have been allowed, this function may need to evict keys from the
- * keyslots of an inline crypto engine, which requires the block device(s).
+ * This is called at unmount time, after all potentially-encrypted inodes have
+ * been evicted. The filesystem's underlying block device(s) are still
+ * available at this time; this is important because after user file accesses
+ * have been allowed, this function may need to evict keys from the keyslots of
+ * an inline crypto engine, which requires the block device(s).
*/
void fscrypt_destroy_keyring(struct super_block *sb)
{
@@ -227,12 +230,12 @@ void fscrypt_destroy_keyring(struct super_block *sb)
hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) {
/*
- * Since all inodes were already evicted, every key
- * remaining in the keyring should have an empty inode
- * list, and should only still be in the keyring due to
- * the single active ref associated with ->mk_secret.
- * There should be no structural refs beyond the one
- * associated with the active ref.
+ * Since all potentially-encrypted inodes were already
+ * evicted, every key remaining in the keyring should
+ * have an empty inode list, and should only still be in
+ * the keyring due to the single active ref associated
+ * with ->mk_secret. There should be no structural refs
+ * beyond the one associated with the active ref.
*/
WARN_ON(refcount_read(&mk->mk_active_refs) != 1);
WARN_ON(refcount_read(&mk->mk_struct_refs) != 1);
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index e16545849ea7..c08c0f578bc6 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -376,7 +376,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (bdev)
blksize_mask = bdev_logical_block_size(bdev) - 1;
else
- blksize_mask = (1 << inode->i_blkbits) - 1;
+ blksize_mask = i_blocksize(inode) - 1;
if ((iocb->ki_pos | iov_iter_count(to) |
iov_iter_alignment(to)) & blksize_mask)
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 091fd5adf818..d38e19c11270 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -47,7 +47,7 @@ void z_erofs_lzma_exit(void)
}
}
-int z_erofs_lzma_init(void)
+int __init z_erofs_lzma_init(void)
{
unsigned int i;
@@ -278,7 +278,7 @@ again:
}
}
if (no < nrpages_out && strm->buf.out)
- kunmap(rq->in[no]);
+ kunmap(rq->out[no]);
if (ni < nrpages_in)
kunmap(rq->in[ni]);
/* 4. push back LZMA stream context to the global list */
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 3f3561d37d1b..1db018f8c2e8 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -486,7 +486,7 @@ static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count)
void *erofs_get_pcpubuf(unsigned int requiredpages);
void erofs_put_pcpubuf(void *ptr);
int erofs_pcpubuf_growsize(unsigned int nrpages);
-void erofs_pcpubuf_init(void);
+void __init erofs_pcpubuf_init(void);
void erofs_pcpubuf_exit(void);
int erofs_register_sysfs(struct super_block *sb);
@@ -545,7 +545,7 @@ static inline int z_erofs_fill_inode(struct inode *inode) { return -EOPNOTSUPP;
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_ZIP_LZMA
-int z_erofs_lzma_init(void);
+int __init z_erofs_lzma_init(void);
void z_erofs_lzma_exit(void);
int z_erofs_load_lzma_config(struct super_block *sb,
struct erofs_super_block *dsb,
diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c
index a2efd833d1b6..c7a4b1d77069 100644
--- a/fs/erofs/pcpubuf.c
+++ b/fs/erofs/pcpubuf.c
@@ -114,7 +114,7 @@ out:
return ret;
}
-void erofs_pcpubuf_init(void)
+void __init erofs_pcpubuf_init(void)
{
int cpu;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 3247d2422bea..f1708c77a991 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1312,12 +1312,12 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
if (!be->decompressed_pages)
be->decompressed_pages =
- kcalloc(be->nr_pages, sizeof(struct page *),
- GFP_KERNEL | __GFP_NOFAIL);
+ kvcalloc(be->nr_pages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
if (!be->compressed_pages)
be->compressed_pages =
- kcalloc(pclusterpages, sizeof(struct page *),
- GFP_KERNEL | __GFP_NOFAIL);
+ kvcalloc(pclusterpages, sizeof(struct page *),
+ GFP_KERNEL | __GFP_NOFAIL);
z_erofs_parse_out_bvecs(be);
err2 = z_erofs_parse_in_bvecs(be, &overlapped);
@@ -1365,7 +1365,7 @@ out:
}
if (be->compressed_pages < be->onstack_pages ||
be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES)
- kfree(be->compressed_pages);
+ kvfree(be->compressed_pages);
z_erofs_fill_other_copies(be, err);
for (i = 0; i < be->nr_pages; ++i) {
@@ -1384,7 +1384,7 @@ out:
}
if (be->decompressed_pages != be->onstack_pages)
- kfree(be->decompressed_pages);
+ kvfree(be->decompressed_pages);
pcl->length = 0;
pcl->partial = true;
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 8bf6d30518b6..655da4d739cb 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -757,9 +757,6 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
err = z_erofs_do_map_blocks(inode, map, flags);
out:
trace_z_erofs_map_blocks_iter_exit(inode, map, flags, err);
-
- /* aggressively BUG_ON iff CONFIG_EROFS_FS_DEBUG is on */
- DBG_BUGON(err < 0 && err != -ENOMEM);
return err;
}
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4eeb02d456a9..08b29c289da4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1387,7 +1387,7 @@ struct ext4_super_block {
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */
- /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+ /* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 4493ef0c715e..cdf9bfe10137 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
+ if (keys[1].fmr_physical < keys[0].fmr_physical)
+ return 0;
start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 2b42ececa46d..1602d74b5eeb 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -159,7 +159,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
- ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
out:
brelse(is.iloc.bh);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d251d705c276..bf0b7dea4900 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4797,8 +4797,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+ int err;
+
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- return ext4_find_inline_data_nolock(inode);
+ err = ext4_find_inline_data_nolock(inode);
+ if (!err && ext4_has_inline_data(inode))
+ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ return err;
} else
EXT4_I(inode)->i_inline_off = 0;
return 0;
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 12435d61f09e..f9a430152063 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -431,6 +431,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ei_bl->i_flags = 0;
inode_set_iversion(inode_bl, 1);
i_size_write(inode_bl, 0);
+ EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
inode_bl->i_mode = S_IFREG;
if (ext4_has_feature_extents(sb)) {
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 94608b7df7e8..a5010b5b8a8c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1595,11 +1595,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
int has_inline_data = 1;
ret = ext4_find_inline_entry(dir, fname, res_dir,
&has_inline_data);
- if (has_inline_data) {
- if (inlined)
- *inlined = 1;
+ if (inlined)
+ *inlined = has_inline_data;
+ if (has_inline_data)
goto cleanup_and_exit;
- }
}
if ((namelen <= 2) && (name[0] == '.') &&
@@ -3646,7 +3645,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
* so the old->de may no longer valid and need to find it again
* before reset old inode info.
*/
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
retval = PTR_ERR(old.bh);
if (!old.bh)
@@ -3813,9 +3813,20 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return retval;
}
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
- if (IS_ERR(old.bh))
- return PTR_ERR(old.bh);
+ /*
+ * We need to protect against old.inode directory getting converted
+ * from inline directory format into a normal one.
+ */
+ if (S_ISDIR(old.inode->i_mode))
+ inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
+
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
+ if (IS_ERR(old.bh)) {
+ retval = PTR_ERR(old.bh);
+ goto unlock_moved_dir;
+ }
+
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@@ -3872,16 +3883,9 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
goto end_rename;
}
- /*
- * We need to protect against old.inode directory getting
- * converted from inline directory format into a normal one.
- */
- inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
retval = ext4_rename_dir_prepare(handle, &old);
- if (retval) {
- inode_unlock(old.inode);
+ if (retval)
goto end_rename;
- }
}
/*
* If we're renaming a file within an inline_data dir and adding or
@@ -4013,12 +4017,15 @@ end_rename:
} else {
ext4_journal_stop(handle);
}
- if (old.dir_bh)
- inode_unlock(old.inode);
release_bh:
brelse(old.dir_bh);
brelse(old.bh);
brelse(new.bh);
+
+unlock_moved_dir:
+ if (S_ISDIR(old.inode->i_mode))
+ inode_unlock(old.inode);
+
return retval;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index beaec6d81074..1e4db96a04e6 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -409,7 +409,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
static void io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
- struct page *page,
+ struct page *pagecache_page,
+ struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
@@ -421,10 +422,11 @@ submit_and_retry:
}
if (io->io_bio == NULL)
io_submit_init_bio(io, bh);
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
+ bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
+ wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
}
@@ -561,8 +563,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- io_submit_add_bh(io, inode,
- bounce_page ? bounce_page : page, bh);
+ io_submit_add_bh(io, inode, page, bounce_page, bh);
} while ((bh = bh->b_this_page) != head);
unlock:
unlock_page(page);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 88f7b8a88c76..f43e526112ae 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -5726,6 +5726,28 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
return journal_inode;
}
+static int ext4_journal_bmap(journal_t *journal, sector_t *block)
+{
+ struct ext4_map_blocks map;
+ int ret;
+
+ if (journal->j_inode == NULL)
+ return 0;
+
+ map.m_lblk = *block;
+ map.m_len = 1;
+ ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
+ if (ret <= 0) {
+ ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
+ "journal bmap failed: block %llu ret %d\n",
+ *block, ret);
+ jbd2_journal_abort(journal, ret ? ret : -EIO);
+ return ret;
+ }
+ *block = map.m_pblk;
+ return 0;
+}
+
static journal_t *ext4_get_journal(struct super_block *sb,
unsigned int journal_inum)
{
@@ -5746,6 +5768,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
return NULL;
}
journal->j_private = sb;
+ journal->j_bmap = ext4_journal_bmap;
ext4_init_journal_params(sb, journal);
return journal;
}
@@ -5920,6 +5943,7 @@ static int ext4_load_journal(struct super_block *sb,
err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+
if (save)
memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
@@ -5928,6 +5952,14 @@ static int ext4_load_journal(struct super_block *sb,
memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN);
kfree(save);
+ es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
+ EXT4_ERROR_FS);
+ /* Write out restored error information to the superblock */
+ if (!bdev_read_only(sb->s_bdev)) {
+ int err2;
+ err2 = ext4_commit_super(sb);
+ err = err ? : err2;
+ }
}
if (err) {
@@ -6157,11 +6189,13 @@ static int ext4_clear_journal_err(struct super_block *sb,
errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr);
- ext4_warning(sb, "Marking fs in need of filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
- ext4_commit_super(sb);
+ j_errno = ext4_commit_super(sb);
+ if (j_errno)
+ return j_errno;
+ ext4_warning(sb, "Marked fs in need of filesystem check.");
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index e2b8b3437c58..12d6252e3e22 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -501,13 +501,13 @@ static const struct sysfs_ops ext4_attr_ops = {
.store = ext4_attr_store,
};
-static struct kobj_type ext4_sb_ktype = {
+static const struct kobj_type ext4_sb_ktype = {
.default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release,
};
-static struct kobj_type ext4_feat_ktype = {
+static const struct kobj_type ext4_feat_ktype = {
.default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_feat_release,
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 62f2ec599218..767454d74cd6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -2852,6 +2852,9 @@ shift:
(void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
+ if (ext4_has_inline_data(inode))
+ error = ext4_find_inline_data_nolock(inode);
+
cleanup:
if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
diff --git a/fs/file.c b/fs/file.c
index c942c89ca4cd..7893ea161d77 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -642,6 +642,7 @@ static struct file *pick_file(struct files_struct *files, unsigned fd)
if (fd >= fdt->max_fds)
return NULL;
+ fd = array_index_nospec(fd, fdt->max_fds);
file = fdt->fd[fd];
if (file) {
rcu_assign_pointer(fdt->fd[fd], NULL);
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 6fe9ca253b70..2e215e8c3c88 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -83,8 +83,26 @@ static int gfs2_dhash(const struct dentry *dentry, struct qstr *str)
return 0;
}
+static int gfs2_dentry_delete(const struct dentry *dentry)
+{
+ struct gfs2_inode *ginode;
+
+ if (d_really_is_negative(dentry))
+ return 0;
+
+ ginode = GFS2_I(d_inode(dentry));
+ if (!gfs2_holder_initialized(&ginode->i_iopen_gh))
+ return 0;
+
+ if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
+ return 1;
+
+ return 0;
+}
+
const struct dentry_operations gfs2_dops = {
.d_revalidate = gfs2_drevalidate,
.d_hash = gfs2_dhash,
+ .d_delete = gfs2_dentry_delete,
};
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e80c781731f8..8ae419152ff6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -969,10 +969,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
{
int err = 0;
unsigned long long ret;
- sector_t block = 0;
+ sector_t block = blocknr;
- if (journal->j_inode) {
- block = blocknr;
+ if (journal->j_bmap) {
+ err = journal->j_bmap(journal, &block);
+ if (err == 0)
+ *retp = block;
+ } else if (journal->j_inode) {
ret = bmap(journal->j_inode, &block);
if (ret || !block) {
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 7df6324ccb8a..8161667c976f 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -261,7 +261,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
u32 exclusive;
int error;
__be32 *p;
- s32 end;
memset(lock, 0, sizeof(*lock));
locks_init_lock(fl);
@@ -285,13 +284,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
p = xdr_decode_hyper(p, &l_offset);
xdr_decode_hyper(p, &l_len);
- end = l_offset + l_len - 1;
-
- fl->fl_start = (loff_t)l_offset;
- if (l_len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = (loff_t)end;
+ nlm4svc_set_file_lock_range(fl, l_offset, l_len);
error = 0;
out:
return error;
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 712fdfeb8ef0..5fcbf30cd275 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -33,6 +33,17 @@ loff_t_to_s64(loff_t offset)
return res;
}
+void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len)
+{
+ s64 end = off + len - 1;
+
+ fl->fl_start = off;
+ if (len == 0 || end < 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = end;
+}
+
/*
* NLM file handles are defined by specification to be a variable-length
* XDR opaque no longer than 1024 bytes. However, this implementation
@@ -80,7 +91,7 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
locks_init_lock(fl);
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK;
-
+ nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len);
return true;
}
diff --git a/fs/locks.c b/fs/locks.c
index 66b4eef09db5..df8b26a42524 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1863,9 +1863,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
void **priv)
{
struct inode *inode = file_inode(filp);
+ vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode);
int error;
- if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
+ if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE))
return -EACCES;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@@ -2425,7 +2426,6 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
if (flock->l_pid != 0)
goto out;
- cmd = F_GETLK64;
fl->fl_flags |= FL_OFDLCK;
fl->fl_owner = filp;
}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 14a72224b657..450d6c3bc05e 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -75,7 +75,7 @@ config NFS_V3_ACL
config NFS_V4
tristate "NFS client support for NFS version 4"
depends on NFS_FS
- select SUNRPC_GSS
+ select RPCSEC_GSS_KRB5
select KEYS
help
This option enables support for version 4 of the NFS protocol
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a41c3ee4549c..6fbcbb8d6587 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -3089,7 +3089,6 @@ static void nfs_access_add_rbtree(struct inode *inode,
else
goto found;
}
- set->timestamp = ktime_get_ns();
rb_link_node(&set->rb_node, parent, p);
rb_insert_color(&set->rb_node, root_node);
list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
@@ -3114,6 +3113,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
cache->fsgid = cred->fsgid;
cache->group_info = get_group_info(cred->group_info);
cache->mask = set->mask;
+ cache->timestamp = ktime_get_ns();
/* The above field assignments must be visible
* before this item appears on the lru. We cannot easily
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c380cff4108e..e90988591df4 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -15,6 +15,7 @@
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/task_io_accounting_ops.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
@@ -337,6 +338,7 @@ int nfs_read_folio(struct file *file, struct folio *folio)
trace_nfs_aop_readpage(inode, folio);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+ task_io_account_read(folio_size(folio));
/*
* Try to flush any pending writes to the file..
@@ -393,6 +395,7 @@ void nfs_readahead(struct readahead_control *ractl)
trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
+ task_io_account_read(readahead_length(ractl));
ret = -ESTALE;
if (NFS_STALE(inode))
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 7c441f2bd444..43b88eaf0673 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -73,7 +73,7 @@ config NFSD_V4
bool "NFS server support for NFS version 4"
depends on NFSD && PROC_FS
select FS_POSIX_ACL
- select SUNRPC_GSS
+ select RPCSEC_GSS_KRB5
select CRYPTO
select CRYPTO_MD5
select CRYPTO_SHA256
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index e7462b5e5f1e..5783209f17fc 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -941,8 +941,15 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct page *last_page;
last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
- for (page += offset / PAGE_SIZE; page <= last_page; page++)
+ for (page += offset / PAGE_SIZE; page <= last_page; page++) {
+ /*
+ * Skip page replacement when extending the contents
+ * of the current page.
+ */
+ if (page == *(rqstp->rq_next_page - 1))
+ continue;
svc_rqst_replace_page(rqstp, page);
+ }
if (rqstp->rq_res.page_len == 0) // first call
rqstp->rq_res.page_base = offset % PAGE_SIZE;
rqstp->rq_res.page_len += sd->len;
@@ -1104,7 +1111,9 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
since = READ_ONCE(file->f_wb_err);
if (verf)
nfsd_copy_write_verifier(verf, nn);
+ file_start_write(file);
host_err = vfs_iter_write(file, &iter, &pos, flags);
+ file_end_write(file);
if (host_err < 0) {
nfsd_reset_write_verifier(nn);
trace_nfsd_writeverf_reset(nn, rqstp, host_err);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1d65f6ef00ca..0394505fdce3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1977,11 +1977,26 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
}
if (unlikely(copied < len) && wc->w_target_page) {
+ loff_t new_isize;
+
if (!PageUptodate(wc->w_target_page))
copied = 0;
- ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
- start+len);
+ new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
+ if (new_isize > page_offset(wc->w_target_page))
+ ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+ start+len);
+ else {
+ /*
+ * When page is fully beyond new isize (data copy
+ * failed), do not bother zeroing the page. Invalidate
+ * it instead so that writeback does not get confused
+ * put page & buffer dirty bits into inconsistent
+ * state.
+ */
+ block_invalidate_folio(page_folio(wc->w_target_page),
+ 0, PAGE_SIZE);
+ }
}
if (wc->w_target_page)
flush_dcache_page(wc->w_target_page);
diff --git a/fs/splice.c b/fs/splice.c
index 2e76dbb81a8f..2c3dec2b6dfa 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -937,7 +937,6 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
/*
* Do the splice.
*/
- ret = 0;
bytes = 0;
len = sd->total_len;
flags = sd->flags;
diff --git a/fs/super.c b/fs/super.c
index 84332d5cb817..04bc62ab7dfe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -475,13 +475,22 @@ void generic_shutdown_super(struct super_block *sb)
cgroup_writeback_umount();
- /* evict all inodes with zero refcount */
+ /* Evict all inodes with zero refcount. */
evict_inodes(sb);
- /* only nonzero refcount inodes can have marks */
+
+ /*
+ * Clean up and evict any inodes that still have references due
+ * to fsnotify or the security policy.
+ */
fsnotify_sb_delete(sb);
- fscrypt_destroy_keyring(sb);
security_sb_delete(sb);
+ /*
+ * Now that all potentially-encrypted inodes have been evicted,
+ * the fscrypt keyring can be destroyed.
+ */
+ fscrypt_destroy_keyring(sb);
+
if (sb->s_dio_done_wq) {
destroy_workqueue(sb->s_dio_done_wq);
sb->s_dio_done_wq = NULL;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index f7a9607c2b95..2210e5eb1ea0 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -193,7 +193,7 @@ static int udf_adinicb_writepage(struct folio *folio,
struct udf_inode_info *iinfo = UDF_I(inode);
BUG_ON(!PageLocked(page));
- memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr,
+ memcpy_from_page(iinfo->i_data + iinfo->i_lenEAttr, page, 0,
i_size_read(inode));
unlock_page(page);
mark_inode_dirty(inode);
@@ -241,6 +241,15 @@ static int udf_read_folio(struct file *file, struct folio *folio)
static void udf_readahead(struct readahead_control *rac)
{
+ struct udf_inode_info *iinfo = UDF_I(rac->mapping->host);
+
+ /*
+ * No readahead needed for in-ICB files and udf_get_block() would get
+ * confused for such file anyway.
+ */
+ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
+ return;
+
mpage_readahead(rac, udf_get_block);
}
@@ -407,6 +416,9 @@ static int udf_map_block(struct inode *inode, struct udf_map_rq *map)
int err;
struct udf_inode_info *iinfo = UDF_I(inode);
+ if (WARN_ON_ONCE(iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB))
+ return -EFSCORRUPTED;
+
map->oflags = 0;
if (!(map->iflags & UDF_MAP_CREATE)) {
struct kernel_lb_addr eloc;
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index e13db6507b38..7a0e3a84d370 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -8,7 +8,6 @@
#include "fsverity_private.h"
#include <linux/mount.h>
-#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
@@ -367,25 +366,27 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
goto out_drop_write;
err = enable_verity(filp, &arg);
- if (err)
- goto out_allow_write_access;
/*
- * Some pages of the file may have been evicted from pagecache after
- * being used in the Merkle tree construction, then read into pagecache
- * again by another process reading from the file concurrently. Since
- * these pages didn't undergo verification against the file digest which
- * fs-verity now claims to be enforcing, we have to wipe the pagecache
- * to ensure that all future reads are verified.
+ * We no longer drop the inode's pagecache after enabling verity. This
+ * used to be done to try to avoid a race condition where pages could be
+ * evicted after being used in the Merkle tree construction, then
+ * re-instantiated by a concurrent read. Such pages are unverified, and
+ * the backing storage could have filled them with different content, so
+ * they shouldn't be used to fulfill reads once verity is enabled.
+ *
+ * But, dropping the pagecache has a big performance impact, and it
+ * doesn't fully solve the race condition anyway. So for those reasons,
+ * and also because this race condition isn't very important relatively
+ * speaking (especially for small-ish files, where the chance of a page
+ * being used, evicted, *and* re-instantiated all while enabling verity
+ * is quite small), we no longer drop the inode's pagecache.
*/
- filemap_write_and_wait(inode->i_mapping);
- invalidate_inode_pages2(inode->i_mapping);
/*
* allow_write_access() is needed to pair with deny_write_access().
* Regardless, the filesystem won't allow writing to verity files.
*/
-out_allow_write_access:
allow_write_access(filp);
out_drop_write:
mnt_drop_write_file(filp);
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index f50e3b5b52c9..e2508222750b 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -387,15 +387,15 @@ EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
int __init fsverity_init_workqueue(void)
{
/*
- * Use an unbound workqueue to allow bios to be verified in parallel
- * even when they happen to complete on the same CPU. This sacrifices
- * locality, but it's worthwhile since hashing is CPU-intensive.
+ * Use a high-priority workqueue to prioritize verification work, which
+ * blocks reads from completing, over regular application tasks.
*
- * Also use a high-priority workqueue to prioritize verification work,
- * which blocks reads from completing, over regular application tasks.
+ * For performance reasons, don't use an unbound workqueue. Using an
+ * unbound workqueue for crypto operations causes excessive scheduler
+ * latency on ARM64.
*/
fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
- WQ_UNBOUND | WQ_HIGHPRI,
+ WQ_HIGHPRI,
num_online_cpus());
if (!fsverity_read_workqueue)
return -ENOMEM;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 41734202796f..2ef78aa1d3f6 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -449,15 +449,17 @@ xfs_prepare_ioend(
}
/*
- * If the page has delalloc blocks on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip up a later direct I/O read operation on the same region.
+ * If the folio has delalloc blocks on it, the caller is asking us to punch them
+ * out. If we don't, we can leave a stale delalloc mapping covered by a clean
+ * page that needs to be dirtied again before the delalloc mapping can be
+ * converted. This stale delalloc mapping can trip up a later direct I/O read
+ * operation on the same region.
*
- * We prevent this by truncating away the delalloc regions on the page. Because
+ * We prevent this by truncating away the delalloc regions on the folio. Because
* they are delalloc, we can do this without needing a transaction. Indeed - if
* we get ENOSPC errors, we have to be able to do this truncation without a
- * transaction as there is no space left for block reservation (typically why we
- * see a ENOSPC in writeback).
+ * transaction as there is no space left for block reservation (typically why
+ * we see a ENOSPC in writeback).
*/
static void
xfs_discard_folio(
@@ -475,8 +477,13 @@ xfs_discard_folio(
"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
folio, ip->i_ino, pos);
+ /*
+ * The end of the punch range is always the offset of the the first
+ * byte of the next folio. Hence the end offset is only dependent on the
+ * folio itself and not the start offset that is passed in.
+ */
error = xfs_bmap_punch_delalloc_range(ip, pos,
- round_up(pos, folio_size(folio)));
+ folio_pos(folio) + folio_size(folio));
if (error && !xfs_is_shutdown(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 7dc0db7f5a76..6abcc34fafd8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1321,15 +1321,14 @@ xfs_qm_quotacheck(
error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
NULL);
- if (error) {
- /*
- * The inode walk may have partially populated the dquot
- * caches. We must purge them before disabling quota and
- * tearing down the quotainfo, or else the dquots will leak.
- */
- xfs_qm_dqpurge_all(mp);
- goto error_return;
- }
+
+ /*
+ * On error, the inode walk may have partially populated the dquot
+ * caches. We must purge them before disabling quota and tearing down
+ * the quotainfo, or else the dquots will leak.
+ */
+ if (error)
+ goto error_purge;
/*
* We've made all the changes that we need to make incore. Flush them
@@ -1363,10 +1362,8 @@ xfs_qm_quotacheck(
* and turn quotaoff. The dquots won't be attached to any of the inodes
* at this point (because we intentionally didn't in dqget_noattach).
*/
- if (error) {
- xfs_qm_dqpurge_all(mp);
- goto error_return;
- }
+ if (error)
+ goto error_purge;
/*
* If one type of quotas is off, then it will lose its
@@ -1376,7 +1373,7 @@ xfs_qm_quotacheck(
mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
mp->m_qflags |= flags;
- error_return:
+error_return:
xfs_buf_delwri_cancel(&buffer_list);
if (error) {
@@ -1395,6 +1392,21 @@ xfs_qm_quotacheck(
} else
xfs_notice(mp, "Quotacheck: Done.");
return error;
+
+error_purge:
+ /*
+ * On error, we may have inodes queued for inactivation. This may try
+ * to attach dquots to the inode before running cleanup operations on
+ * the inode and this can race with the xfs_qm_destroy_quotainfo() call
+ * below that frees mp->m_quotainfo. To avoid this race, flush all the
+ * pending inodegc operations before we purge the dquots from memory,
+ * ensuring that background inactivation is idle whilst we turn off
+ * quotas.
+ */
+ xfs_inodegc_flush(mp);
+ xfs_qm_dqpurge_all(mp);
+ goto error_return;
+
}
/*
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 738b0e28d74b..617e4f9db42e 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -383,7 +383,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
struct block_device *bdev = inode->i_sb->s_bdev;
unsigned int max = bdev_max_zone_append_sectors(bdev);
struct bio *bio;
- ssize_t size;
+ ssize_t size = 0;
int nr_pages;
ssize_t ret;
@@ -426,7 +426,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
if (bio->bi_iter.bi_sector != wpsector) {
zonefs_warn(inode->i_sb,
"Corrupted write pointer %llu for zone at %llu\n",
- wpsector, z->z_sector);
+ bio->bi_iter.bi_sector, z->z_sector);
ret = -EIO;
}
}