summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c12
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/afs/inode.c8
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/super.c3
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/file.c57
-rw-r--r--fs/btrfs/inode.c58
-rw-r--r--fs/btrfs/qgroup.c88
-rw-r--r--fs/btrfs/tests/inode-tests.c12
-rw-r--r--fs/btrfs/tree-checker.c3
-rw-r--r--fs/btrfs/volumes.c8
-rw-r--r--fs/cifs/cifsacl.c1
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/smb2ops.c92
-rw-r--r--fs/cifs/smb2pdu.c71
-rw-r--r--fs/cifs/smb2pdu.h2
-rw-r--r--fs/cifs/transport.c4
-rw-r--r--fs/coredump.c3
-rw-r--r--fs/crypto/fname.c8
-rw-r--r--fs/crypto/fscrypt_private.h56
-rw-r--r--fs/crypto/hooks.c55
-rw-r--r--fs/crypto/keyring.c10
-rw-r--r--fs/crypto/keysetup.c44
-rw-r--r--fs/crypto/policy.c27
-rw-r--r--fs/efivarfs/inode.c2
-rw-r--r--fs/ext4/dir.c16
-rw-r--r--fs/ext4/ext4.h3
-rw-r--r--fs/ext4/namei.c13
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/f2fs/dir.c10
-rw-r--r--fs/f2fs/f2fs.h2
-rw-r--r--fs/gfs2/glock.c1
-rw-r--r--fs/gfs2/glops.c17
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/inode.c42
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/io_uring.c118
-rw-r--r--fs/jbd2/journal.c34
-rw-r--r--fs/jbd2/transaction.c31
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/nfs/Kconfig9
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c27
-rw-r--r--fs/nfs/nfs42proc.c21
-rw-r--r--fs/nfs/nfs42xdr.c1
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c2
-rw-r--r--fs/nfs/pagelist.c36
-rw-r--r--fs/notify/fsnotify.c12
-rw-r--r--fs/proc/self.c7
-rw-r--r--fs/proc/task_mmu.c8
-rw-r--r--fs/seq_file.c57
-rw-r--r--fs/ubifs/dir.c28
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c8
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c16
-rw-r--r--fs/xfs/scrub/bmap.c8
-rw-r--r--fs/xfs/scrub/btree.c45
-rw-r--r--fs/xfs/scrub/dir.c21
-rw-r--r--fs/xfs/xfs_iomap.c29
-rw-r--r--fs/xfs/xfs_iwalk.c27
-rw-r--r--fs/xfs/xfs_mount.c11
-rw-r--r--fs/zonefs/super.c14
62 files changed, 877 insertions, 450 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index b177fd3b1eb3..be5768949cb1 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -655,6 +655,8 @@ const struct file_operations v9fs_cached_file_operations = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
.mmap = v9fs_file_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync,
};
@@ -667,6 +669,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
.mmap = v9fs_file_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
};
@@ -678,6 +682,8 @@ const struct file_operations v9fs_file_operations = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
.mmap = generic_file_readonly_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync,
};
@@ -690,6 +696,8 @@ const struct file_operations v9fs_file_operations_dotl = {
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
.mmap = generic_file_readonly_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
};
@@ -701,6 +709,8 @@ const struct file_operations v9fs_mmap_file_operations = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
.mmap = v9fs_mmap_file_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync,
};
@@ -713,5 +723,7 @@ const struct file_operations v9fs_mmap_file_operations_dotl = {
.lock = v9fs_file_lock_dotl,
.flock = v9fs_file_flock_dotl,
.mmap = v9fs_mmap_file_mmap,
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
.fsync = v9fs_file_fsync_dotl,
};
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 1bb5b9d7f0a2..9068d5578a26 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -823,6 +823,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
vp->cb_break_before = afs_calc_vnode_cb_break(vnode);
vp->vnode = vnode;
vp->put_vnode = true;
+ vp->speculative = true; /* vnode not locked */
}
}
}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0fe8844b4bee..b0d7b892090d 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -294,6 +294,13 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
}
} else if (vp->scb.have_status) {
+ if (vp->dv_before + vp->dv_delta != vp->scb.status.data_version &&
+ vp->speculative)
+ /* Ignore the result of a speculative bulk status fetch
+ * if it splits around a modification op, thereby
+ * appearing to regress the data version.
+ */
+ goto out;
afs_apply_status(op, vp);
if (vp->scb.have_cb)
afs_apply_callback(op, vp);
@@ -305,6 +312,7 @@ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *v
}
}
+out:
write_sequnlock(&vnode->cb_lock);
if (vp->scb.have_status)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 14d5d75f4b6e..0d150a29e39e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -755,6 +755,7 @@ struct afs_vnode_param {
bool update_ctime:1; /* Need to update the ctime */
bool set_size:1; /* Must update i_size */
bool op_unlinked:1; /* True if file was unlinked by op */
+ bool speculative:1; /* T if speculative status fetch (no vnode lock) */
};
/*
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 6c5900df6aa5..e38bb1e7a4d2 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -230,6 +230,9 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
_enter(",%s", name);
+ if (fc->source)
+ return invalf(fc, "kAFS: Multiple sources not supported");
+
if (!name) {
printk(KERN_ERR "kAFS: no volume name specified\n");
return -EINVAL;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 0378933d163c..0b29bdb25105 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -878,7 +878,10 @@ struct btrfs_fs_info {
*/
struct ulist *qgroup_ulist;
- /* protect user change for quota operations */
+ /*
+ * Protect user change for quota operations. If a transaction is needed,
+ * it must be started before locking this lock.
+ */
struct mutex qgroup_ioctl_lock;
/* list of dirty qgroups to be written at next commit */
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 87355a38a654..4373da7bcc0d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
}
}
-static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
- const u64 start,
- const u64 len,
- struct extent_state **cached_state)
-{
- u64 search_start = start;
- const u64 end = start + len - 1;
-
- while (search_start < end) {
- const u64 search_len = end - search_start + 1;
- struct extent_map *em;
- u64 em_len;
- int ret = 0;
-
- em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
- if (IS_ERR(em))
- return PTR_ERR(em);
-
- if (em->block_start != EXTENT_MAP_HOLE)
- goto next;
-
- em_len = em->len;
- if (em->start < search_start)
- em_len -= search_start - em->start;
- if (em_len > search_len)
- em_len = search_len;
-
- ret = set_extent_bit(&inode->io_tree, search_start,
- search_start + em_len - 1,
- EXTENT_DELALLOC_NEW,
- NULL, cached_state, GFP_NOFS);
-next:
- search_start = extent_map_end(em);
- free_extent_map(em);
- if (ret)
- return ret;
- }
- return 0;
-}
-
/*
* after copy_from_user, pages need to be dirtied and we need to make
* sure holes are created between the current EOF and the start of
@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, cached);
- if (!btrfs_is_free_space_inode(inode)) {
- if (start_pos >= isize &&
- !(inode->flags & BTRFS_INODE_PREALLOC)) {
- /*
- * There can't be any extents following eof in this case
- * so just set the delalloc new bit for the range
- * directly.
- */
- extra_bits |= EXTENT_DELALLOC_NEW;
- } else {
- err = btrfs_find_new_delalloc_bytes(inode, start_pos,
- num_bytes, cached);
- if (err)
- return err;
- }
- }
-
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
extra_bits, cached);
if (err)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da58c58ef9aa..7e8d8169779d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2253,11 +2253,69 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
return 0;
}
+static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
+ const u64 start,
+ const u64 len,
+ struct extent_state **cached_state)
+{
+ u64 search_start = start;
+ const u64 end = start + len - 1;
+
+ while (search_start < end) {
+ const u64 search_len = end - search_start + 1;
+ struct extent_map *em;
+ u64 em_len;
+ int ret = 0;
+
+ em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
+ if (IS_ERR(em))
+ return PTR_ERR(em);
+
+ if (em->block_start != EXTENT_MAP_HOLE)
+ goto next;
+
+ em_len = em->len;
+ if (em->start < search_start)
+ em_len -= search_start - em->start;
+ if (em_len > search_len)
+ em_len = search_len;
+
+ ret = set_extent_bit(&inode->io_tree, search_start,
+ search_start + em_len - 1,
+ EXTENT_DELALLOC_NEW,
+ NULL, cached_state, GFP_NOFS);
+next:
+ search_start = extent_map_end(em);
+ free_extent_map(em);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
unsigned int extra_bits,
struct extent_state **cached_state)
{
WARN_ON(PAGE_ALIGNED(end));
+
+ if (start >= i_size_read(&inode->vfs_inode) &&
+ !(inode->flags & BTRFS_INODE_PREALLOC)) {
+ /*
+ * There can't be any extents following eof in this case so just
+ * set the delalloc new bit for the range directly.
+ */
+ extra_bits |= EXTENT_DELALLOC_NEW;
+ } else {
+ int ret;
+
+ ret = btrfs_find_new_delalloc_bytes(inode, start,
+ end + 1 - start,
+ cached_state);
+ if (ret)
+ return ret;
+ }
+
return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
cached_state);
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 77c54749f432..87bd37b70738 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -11,6 +11,7 @@
#include <linux/slab.h>
#include <linux/workqueue.h>
#include <linux/btrfs.h>
+#include <linux/sched/mm.h>
#include "ctree.h"
#include "transaction.h"
@@ -497,13 +498,13 @@ next2:
break;
}
out:
+ btrfs_free_path(path);
fs_info->qgroup_flags |= flags;
if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
ret >= 0)
ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
- btrfs_free_path(path);
if (ret < 0) {
ulist_free(fs_info->qgroup_ulist);
@@ -936,6 +937,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
struct btrfs_key found_key;
struct btrfs_qgroup *qgroup = NULL;
struct btrfs_trans_handle *trans = NULL;
+ struct ulist *ulist = NULL;
int ret = 0;
int slot;
@@ -943,8 +945,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
if (fs_info->quota_root)
goto out;
- fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
- if (!fs_info->qgroup_ulist) {
+ ulist = ulist_alloc(GFP_KERNEL);
+ if (!ulist) {
ret = -ENOMEM;
goto out;
}
@@ -952,6 +954,22 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
ret = btrfs_sysfs_add_qgroups(fs_info);
if (ret < 0)
goto out;
+
+ /*
+ * Unlock qgroup_ioctl_lock before starting the transaction. This is to
+ * avoid lock acquisition inversion problems (reported by lockdep) between
+ * qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
+ * start a transaction.
+ * After we started the transaction lock qgroup_ioctl_lock again and
+ * check if someone else created the quota root in the meanwhile. If so,
+ * just return success and release the transaction handle.
+ *
+ * Also we don't need to worry about someone else calling
+ * btrfs_sysfs_add_qgroups() after we unlock and getting an error because
+ * that function returns 0 (success) when the sysfs entries already exist.
+ */
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
/*
* 1 for quota root item
* 1 for BTRFS_QGROUP_STATUS item
@@ -961,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
* would be a lot of overkill.
*/
trans = btrfs_start_transaction(tree_root, 2);
+
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}
+ if (fs_info->quota_root)
+ goto out;
+
+ fs_info->qgroup_ulist = ulist;
+ ulist = NULL;
+
/*
* initially create the quota tree
*/
@@ -1124,11 +1150,14 @@ out:
if (ret) {
ulist_free(fs_info->qgroup_ulist);
fs_info->qgroup_ulist = NULL;
- if (trans)
- btrfs_end_transaction(trans);
btrfs_sysfs_del_qgroups(fs_info);
}
mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ if (ret && trans)
+ btrfs_end_transaction(trans);
+ else if (trans)
+ ret = btrfs_end_transaction(trans);
+ ulist_free(ulist);
return ret;
}
@@ -1141,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
+ mutex_unlock(&fs_info->qgroup_ioctl_lock);
/*
* 1 For the root item
*
* We should also reserve enough items for the quota tree deletion in
* btrfs_clean_quota_tree but this is not done.
+ *
+ * Also, we must always start a transaction without holding the mutex
+ * qgroup_ioctl_lock, see btrfs_quota_enable().
*/
trans = btrfs_start_transaction(fs_info->tree_root, 1);
+
+ mutex_lock(&fs_info->qgroup_ioctl_lock);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ trans = NULL;
goto out;
}
+ if (!fs_info->quota_root)
+ goto out;
+
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
@@ -1167,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
ret = btrfs_clean_quota_tree(trans, quota_root);
if (ret) {
btrfs_abort_transaction(trans, ret);
- goto end_trans;
+ goto out;
}
ret = btrfs_del_root(trans, &quota_root->root_key);
if (ret) {
btrfs_abort_transaction(trans, ret);
- goto end_trans;
+ goto out;
}
list_del(&quota_root->dirty_list);
@@ -1185,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_put_root(quota_root);
-end_trans:
- ret = btrfs_end_transaction(trans);
out:
mutex_unlock(&fs_info->qgroup_ioctl_lock);
+ if (ret && trans)
+ btrfs_end_transaction(trans);
+ else if (trans)
+ ret = btrfs_end_transaction(trans);
+
return ret;
}
@@ -1324,13 +1366,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
struct btrfs_qgroup *member;
struct btrfs_qgroup_list *list;
struct ulist *tmp;
+ unsigned int nofs_flag;
int ret = 0;
/* Check the level of src and dst first */
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
return -EINVAL;
+ /* We hold a transaction handle open, must do a NOFS allocation. */
+ nofs_flag = memalloc_nofs_save();
tmp = ulist_alloc(GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
if (!tmp)
return -ENOMEM;
@@ -1387,10 +1433,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
struct btrfs_qgroup_list *list;
struct ulist *tmp;
bool found = false;
+ unsigned int nofs_flag;
int ret = 0;
int ret2;
+ /* We hold a transaction handle open, must do a NOFS allocation. */
+ nofs_flag = memalloc_nofs_save();
tmp = ulist_alloc(GFP_KERNEL);
+ memalloc_nofs_restore(nofs_flag);
if (!tmp)
return -ENOMEM;
@@ -3512,6 +3562,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
{
struct btrfs_trans_handle *trans;
int ret;
+ bool can_commit = true;
/*
* We don't want to run flush again and again, so if there is a running
@@ -3523,6 +3574,20 @@ static int try_flush_qgroup(struct btrfs_root *root)
return 0;
}
+ /*
+ * If current process holds a transaction, we shouldn't flush, as we
+ * assume all space reservation happens before a transaction handle is
+ * held.
+ *
+ * But there are cases like btrfs_delayed_item_reserve_metadata() where
+ * we try to reserve space with one transction handle already held.
+ * In that case we can't commit transaction, but at least try to end it
+ * and hope the started data writes can free some space.
+ */
+ if (current->journal_info &&
+ current->journal_info != BTRFS_SEND_TRANS_STUB)
+ can_commit = false;
+
ret = btrfs_start_delalloc_snapshot(root);
if (ret < 0)
goto out;
@@ -3534,7 +3599,10 @@ static int try_flush_qgroup(struct btrfs_root *root)
goto out;
}
- ret = btrfs_commit_transaction(trans);
+ if (can_commit)
+ ret = btrfs_commit_transaction(trans);
+ else
+ ret = btrfs_end_transaction(trans);
out:
clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
wake_up(&root->qgroup_flush_wait);
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index e6719f7db386..04022069761d 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -983,7 +983,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1050,7 +1051,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1082,7 +1084,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_UPTODATE, 0, 0, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@@ -1097,7 +1100,8 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
out:
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
- EXTENT_DELALLOC | EXTENT_UPTODATE, 0, 0, NULL);
+ EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+ EXTENT_UPTODATE, 0, 0, NULL);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 8784b74f5232..ea2bb4cb5890 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1068,6 +1068,7 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
"invalid root item size, have %u expect %zu or %u",
btrfs_item_size_nr(leaf, slot), sizeof(ri),
btrfs_legacy_root_item_size());
+ return -EUCLEAN;
}
/*
@@ -1423,6 +1424,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
"invalid item size, have %u expect aligned to %zu for key type %u",
btrfs_item_size_nr(leaf, slot),
sizeof(*dref), key->type);
+ return -EUCLEAN;
}
if (!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize)) {
generic_err(leaf, slot,
@@ -1451,6 +1453,7 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
extent_err(leaf, slot,
"invalid extent data backref offset, have %llu expect aligned to %u",
offset, leaf->fs_info->sectorsize);
+ return -EUCLEAN;
}
}
return 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a6406b3b8c2b..78637665166e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -940,7 +940,13 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (device->bdev != path_bdev) {
bdput(path_bdev);
mutex_unlock(&fs_devices->device_list_mutex);
- btrfs_warn_in_rcu(device->fs_info,
+ /*
+ * device->fs_info may not be reliable here, so
+ * pass in a NULL instead. This avoids a
+ * possible use-after-free when the fs_info and
+ * fs_info->sb are already torn down.
+ */
+ btrfs_warn_in_rcu(NULL,
"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
path, devid, found_transid,
current->comm,
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 23b21e943652..ef4784e72b1d 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1266,6 +1266,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
} else if (mode_from_special_sid) {
rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, true);
+ kfree(pntsd);
} else {
/* get approximated mode from ACL */
rc = parse_sec_desc(cifs_sb, pntsd, acllen, fattr, false);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c38156f324dd..44f9cce57099 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -876,6 +876,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
+ cancel_delayed_work_sync(&server->echo);
+
spin_lock(&GlobalMid_Lock);
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
@@ -4544,7 +4546,8 @@ static void set_root_ses(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
if (ses) {
spin_lock(&cifs_tcp_ses_lock);
ses->ses_count++;
- ses->tcon_ipc->remap = cifs_remap(cifs_sb);
+ if (ses->tcon_ipc)
+ ses->tcon_ipc->remap = cifs_remap(cifs_sb);
spin_unlock(&cifs_tcp_ses_lock);
}
*root_ses = ses;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 504766cb6c19..3d914d7d0d11 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -264,7 +264,7 @@ smb2_revert_current_mid(struct TCP_Server_Info *server, const unsigned int val)
}
static struct mid_q_entry *
-smb2_find_mid(struct TCP_Server_Info *server, char *buf)
+__smb2_find_mid(struct TCP_Server_Info *server, char *buf, bool dequeue)
{
struct mid_q_entry *mid;
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
@@ -281,6 +281,10 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
(mid->mid_state == MID_REQUEST_SUBMITTED) &&
(mid->command == shdr->Command)) {
kref_get(&mid->refcount);
+ if (dequeue) {
+ list_del_init(&mid->qhead);
+ mid->mid_flags |= MID_DELETED;
+ }
spin_unlock(&GlobalMid_Lock);
return mid;
}
@@ -289,6 +293,18 @@ smb2_find_mid(struct TCP_Server_Info *server, char *buf)
return NULL;
}
+static struct mid_q_entry *
+smb2_find_mid(struct TCP_Server_Info *server, char *buf)
+{
+ return __smb2_find_mid(server, buf, false);
+}
+
+static struct mid_q_entry *
+smb2_find_dequeue_mid(struct TCP_Server_Info *server, char *buf)
+{
+ return __smb2_find_mid(server, buf, true);
+}
+
static void
smb2_dump_detail(void *buf, struct TCP_Server_Info *server)
{
@@ -3098,8 +3114,8 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
rqst[1].rq_nvec = SMB2_IOCTL_IOV_SIZE;
rc = SMB2_ioctl_init(tcon, server,
- &rqst[1], fid.persistent_fid,
- fid.volatile_fid, FSCTL_GET_REPARSE_POINT,
+ &rqst[1], COMPOUND_FID,
+ COMPOUND_FID, FSCTL_GET_REPARSE_POINT,
true /* is_fctl */, NULL, 0,
CIFSMaxBufSize -
MAX_SMB2_CREATE_RESPONSE_SIZE -
@@ -4356,7 +4372,8 @@ init_read_bvec(struct page **pages, unsigned int npages, unsigned int data_size,
static int
handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
char *buf, unsigned int buf_len, struct page **pages,
- unsigned int npages, unsigned int page_data_size)
+ unsigned int npages, unsigned int page_data_size,
+ bool is_offloaded)
{
unsigned int data_offset;
unsigned int data_len;
@@ -4378,7 +4395,8 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (server->ops->is_session_expired &&
server->ops->is_session_expired(buf)) {
- cifs_reconnect(server);
+ if (!is_offloaded)
+ cifs_reconnect(server);
return -1;
}
@@ -4402,7 +4420,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
cifs_dbg(FYI, "%s: server returned error %d\n",
__func__, rdata->result);
/* normal error on read response */
- dequeue_mid(mid, false);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_RECEIVED;
+ else
+ dequeue_mid(mid, false);
return 0;
}
@@ -4426,7 +4447,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
cifs_dbg(FYI, "%s: data offset (%u) beyond end of smallbuf\n",
__func__, data_offset);
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
@@ -4442,21 +4466,30 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
cifs_dbg(FYI, "%s: data offset (%u) beyond 1st page of response\n",
__func__, data_offset);
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
if (data_len > page_data_size - pad_len) {
/* data_len is corrupt -- discard frame */
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
rdata->result = init_read_bvec(pages, npages, page_data_size,
cur_off, &bvec);
if (rdata->result != 0) {
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
@@ -4471,7 +4504,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
/* read response payload cannot be in both buf and pages */
WARN_ONCE(1, "buf can not contain only a part of read data");
rdata->result = -EIO;
- dequeue_mid(mid, rdata->result);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_MALFORMED;
+ else
+ dequeue_mid(mid, rdata->result);
return 0;
}
@@ -4482,7 +4518,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
if (length < 0)
return length;
- dequeue_mid(mid, false);
+ if (is_offloaded)
+ mid->mid_state = MID_RESPONSE_RECEIVED;
+ else
+ dequeue_mid(mid, false);
return length;
}
@@ -4511,15 +4550,34 @@ static void smb2_decrypt_offload(struct work_struct *work)
}
dw->server->lstrp = jiffies;
- mid = smb2_find_mid(dw->server, dw->buf);
+ mid = smb2_find_dequeue_mid(dw->server, dw->buf);
if (mid == NULL)
cifs_dbg(FYI, "mid not found\n");
else {
mid->decrypted = true;
rc = handle_read_data(dw->server, mid, dw->buf,
dw->server->vals->read_rsp_size,
- dw->ppages, dw->npages, dw->len);
- mid->callback(mid);
+ dw->ppages, dw->npages, dw->len,
+ true);
+ if (rc >= 0) {
+#ifdef CONFIG_CIFS_STATS2
+ mid->when_received = jiffies;
+#endif
+ mid->callback(mid);
+ } else {
+ spin_lock(&GlobalMid_Lock);
+ if (dw->server->tcpStatus == CifsNeedReconnect) {
+ mid->mid_state = MID_RETRY_NEEDED;
+ spin_unlock(&GlobalMid_Lock);
+ mid->callback(mid);
+ } else {
+ mid->mid_state = MID_REQUEST_SUBMITTED;
+ mid->mid_flags &= ~(MID_DELETED);
+ list_add_tail(&mid->qhead,
+ &dw->server->pending_mid_q);
+ spin_unlock(&GlobalMid_Lock);
+ }
+ }
cifs_mid_q_entry_release(mid);
}
@@ -4622,7 +4680,7 @@ non_offloaded_decrypt:
(*mid)->decrypted = true;
rc = handle_read_data(server, *mid, buf,
server->vals->read_rsp_size,
- pages, npages, len);
+ pages, npages, len, false);
}
free_pages:
@@ -4765,7 +4823,7 @@ smb3_handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid)
char *buf = server->large_buf ? server->bigbuf : server->smallbuf;
return handle_read_data(server, mid, buf, server->pdu_size,
- NULL, 0, 0);
+ NULL, 0, 0, false);
}
static int
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 445e80862865..acb72705062d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2272,17 +2272,15 @@ static struct crt_sd_ctxt *
create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
{
struct crt_sd_ctxt *buf;
- struct cifs_ace *pace;
- unsigned int sdlen, acelen;
+ __u8 *ptr, *aclptr;
+ unsigned int acelen, acl_size, ace_count;
unsigned int owner_offset = 0;
unsigned int group_offset = 0;
+ struct smb3_acl acl;
- *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 2), 8);
+ *len = roundup(sizeof(struct crt_sd_ctxt) + (sizeof(struct cifs_ace) * 4), 8);
if (set_owner) {
- /* offset fields are from beginning of security descriptor not of create context */
- owner_offset = sizeof(struct smb3_acl) + (sizeof(struct cifs_ace) * 2);
-
/* sizeof(struct owner_group_sids) is already multiple of 8 so no need to round */
*len += sizeof(struct owner_group_sids);
}
@@ -2291,26 +2289,22 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
if (buf == NULL)
return buf;
+ ptr = (__u8 *)&buf[1];
if (set_owner) {
+ /* offset fields are from beginning of security descriptor not of create context */
+ owner_offset = ptr - (__u8 *)&buf->sd;
buf->sd.OffsetOwner = cpu_to_le32(owner_offset);
- group_offset = owner_offset + sizeof(struct owner_sid);
+ group_offset = owner_offset + offsetof(struct owner_group_sids, group);
buf->sd.OffsetGroup = cpu_to_le32(group_offset);
+
+ setup_owner_group_sids(ptr);
+ ptr += sizeof(struct owner_group_sids);
} else {
buf->sd.OffsetOwner = 0;
buf->sd.OffsetGroup = 0;
}
- sdlen = sizeof(struct smb3_sd) + sizeof(struct smb3_acl) +
- 2 * sizeof(struct cifs_ace);
- if (set_owner) {
- sdlen += sizeof(struct owner_group_sids);
- setup_owner_group_sids(owner_offset + sizeof(struct create_context) + 8 /* name */
- + (char *)buf);
- }
-
- buf->ccontext.DataOffset = cpu_to_le16(offsetof
- (struct crt_sd_ctxt, sd));
- buf->ccontext.DataLength = cpu_to_le32(sdlen);
+ buf->ccontext.DataOffset = cpu_to_le16(offsetof(struct crt_sd_ctxt, sd));
buf->ccontext.NameOffset = cpu_to_le16(offsetof(struct crt_sd_ctxt, Name));
buf->ccontext.NameLength = cpu_to_le16(4);
/* SMB2_CREATE_SD_BUFFER_TOKEN is "SecD" */
@@ -2319,6 +2313,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
buf->Name[2] = 'c';
buf->Name[3] = 'D';
buf->sd.Revision = 1; /* Must be one see MS-DTYP 2.4.6 */
+
/*
* ACL is "self relative" ie ACL is stored in contiguous block of memory
* and "DP" ie the DACL is present
@@ -2326,28 +2321,38 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len)
buf->sd.Control = cpu_to_le16(ACL_CONTROL_SR | ACL_CONTROL_DP);
/* offset owner, group and Sbz1 and SACL are all zero */
- buf->sd.OffsetDacl = cpu_to_le32(sizeof(struct smb3_sd));
- buf->acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */
+ buf->sd.OffsetDacl = cpu_to_le32(ptr - (__u8 *)&buf->sd);
+ /* Ship the ACL for now. we will copy it into buf later. */
+ aclptr = ptr;
+ ptr += sizeof(struct cifs_acl);
/* create one ACE to hold the mode embedded in reserved special SID */
- pace = (struct cifs_ace *)(sizeof(struct crt_sd_ctxt) + (char *)buf);
- acelen = setup_special_mode_ACE(pace, (__u64)mode);
+ acelen = setup_special_mode_ACE((struct cifs_ace *)ptr, (__u64)mode);
+ ptr += acelen;
+ acl_size = acelen + sizeof(struct smb3_acl);
+ ace_count = 1;
if (set_owner) {
/* we do not need to reallocate buffer to add the two more ACEs. plenty of space */
- pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) + (char *)buf));
- acelen += setup_special_user_owner_ACE(pace);
- /* it does not appear necessary to add an ACE for the NFS group SID */
- buf->acl.AceCount = cpu_to_le16(3);
- } else
- buf->acl.AceCount = cpu_to_le16(2);
+ acelen = setup_special_user_owner_ACE((struct cifs_ace *)ptr);
+ ptr += acelen;
+ acl_size += acelen;
+ ace_count += 1;
+ }
/* and one more ACE to allow access for authenticated users */
- pace = (struct cifs_ace *)(acelen + (sizeof(struct crt_sd_ctxt) +
- (char *)buf));
- acelen += setup_authusers_ACE(pace);
-
- buf->acl.AclSize = cpu_to_le16(sizeof(struct cifs_acl) + acelen);
+ acelen = setup_authusers_ACE((struct cifs_ace *)ptr);
+ ptr += acelen;
+ acl_size += acelen;
+ ace_count += 1;
+
+ acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */
+ acl.AclSize = cpu_to_le16(acl_size);
+ acl.AceCount = cpu_to_le16(ace_count);
+ memcpy(aclptr, &acl, sizeof(struct cifs_acl));
+
+ buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd);
+ *len = ptr - (__u8 *)buf;
return buf;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index f05f9b12f689..fa57b03ca98c 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -963,8 +963,6 @@ struct crt_sd_ctxt {
struct create_context ccontext;
__u8 Name[8];
struct smb3_sd sd;
- struct smb3_acl acl;
- /* Followed by at least 4 ACEs */
} __packed;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index e27e255d40dd..36b2ece43403 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -339,8 +339,8 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
return -EAGAIN;
if (signal_pending(current)) {
- cifs_dbg(FYI, "signal is pending before sending any data\n");
- return -EINTR;
+ cifs_dbg(FYI, "signal pending before send request\n");
+ return -ERESTARTSYS;
}
/* cork the socket */
diff --git a/fs/coredump.c b/fs/coredump.c
index 0cd9056d79cc..c6acfc694f65 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -229,7 +229,8 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
*/
if (ispipe) {
if (isspace(*pat_ptr)) {
- was_space = true;
+ if (cn->used != 0)
+ was_space = true;
pat_ptr++;
continue;
} else if (was_space) {
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 1fbe6c24d705..988dadf7a94d 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -404,7 +404,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
fname->disk_name.len = iname->len;
return 0;
}
- ret = fscrypt_get_encryption_info(dir);
+ ret = fscrypt_get_encryption_info(dir, lookup);
if (ret)
return ret;
@@ -560,7 +560,11 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
dir = dget_parent(dentry);
- err = fscrypt_get_encryption_info(d_inode(dir));
+ /*
+ * Pass allow_unsupported=true, so that files with an unsupported
+ * encryption policy can be deleted.
+ */
+ err = fscrypt_get_encryption_info(d_inode(dir), true);
valid = !fscrypt_has_encryption_key(d_inode(dir));
dput(dir);
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 4f5806a3b73d..f0bed6b06fa6 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -25,6 +25,9 @@
#define FSCRYPT_CONTEXT_V1 1
#define FSCRYPT_CONTEXT_V2 2
+/* Keep this in sync with include/uapi/linux/fscrypt.h */
+#define FSCRYPT_MODE_MAX FSCRYPT_MODE_ADIANTUM
+
struct fscrypt_context_v1 {
u8 version; /* FSCRYPT_CONTEXT_V1 */
u8 contents_encryption_mode;
@@ -436,16 +439,9 @@ struct fscrypt_master_key {
* FS_IOC_REMOVE_ENCRYPTION_KEY can be retried, or
* FS_IOC_ADD_ENCRYPTION_KEY can add the secret again.
*
- * Locking: protected by key->sem (outer) and mk_secret_sem (inner).
- * The reason for two locks is that key->sem also protects modifying
- * mk_users, which ranks it above the semaphore for the keyring key
- * type, which is in turn above page faults (via keyring_read). But
- * sometimes filesystems call fscrypt_get_encryption_info() from within
- * a transaction, which ranks it below page faults. So we need a
- * separate lock which protects mk_secret but not also mk_users.
+ * Locking: protected by this master key's key->sem.
*/
struct fscrypt_master_key_secret mk_secret;
- struct rw_semaphore mk_secret_sem;
/*
* For v1 policy keys: an arbitrary key descriptor which was assigned by
@@ -464,8 +460,8 @@ struct fscrypt_master_key {
*
* This is NULL for v1 policy keys; those can only be added by root.
*
- * Locking: in addition to this keyrings own semaphore, this is
- * protected by the master key's key->sem, so we can do atomic
+ * Locking: in addition to this keyring's own semaphore, this is
+ * protected by this master key's key->sem, so we can do atomic
* search+insert. It can also be searched without taking any locks, but
* in that case the returned key may have already been removed.
*/
@@ -491,9 +487,9 @@ struct fscrypt_master_key {
* Per-mode encryption keys for the various types of encryption policies
* that use them. Allocated and derived on-demand.
*/
- struct fscrypt_prepared_key mk_direct_keys[__FSCRYPT_MODE_MAX + 1];
- struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1];
- struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[__FSCRYPT_MODE_MAX + 1];
+ struct fscrypt_prepared_key mk_direct_keys[FSCRYPT_MODE_MAX + 1];
+ struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[FSCRYPT_MODE_MAX + 1];
+ struct fscrypt_prepared_key mk_iv_ino_lblk_32_keys[FSCRYPT_MODE_MAX + 1];
/* Hash key for inode numbers. Initialized only when needed. */
siphash_key_t mk_ino_hash_key;
@@ -507,9 +503,9 @@ is_master_key_secret_present(const struct fscrypt_master_key_secret *secret)
/*
* The READ_ONCE() is only necessary for fscrypt_drop_inode() and
* fscrypt_key_describe(). These run in atomic context, so they can't
- * take ->mk_secret_sem and thus 'secret' can change concurrently which
- * would be a data race. But they only need to know whether the secret
- * *was* present at the time of check, so READ_ONCE() suffices.
+ * take the key semaphore and thus 'secret' can change concurrently
+ * which would be a data race. But they only need to know whether the
+ * secret *was* present at the time of check, so READ_ONCE() suffices.
*/
return READ_ONCE(secret->size) != 0;
}
@@ -575,6 +571,34 @@ int fscrypt_derive_dirhash_key(struct fscrypt_info *ci,
void fscrypt_hash_inode_number(struct fscrypt_info *ci,
const struct fscrypt_master_key *mk);
+int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported);
+
+/**
+ * fscrypt_require_key() - require an inode's encryption key
+ * @inode: the inode we need the key for
+ *
+ * If the inode is encrypted, set up its encryption key if not already done.
+ * Then require that the key be present and return -ENOKEY otherwise.
+ *
+ * No locks are needed, and the key will live as long as the struct inode --- so
+ * it won't go away from under you.
+ *
+ * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
+ * if a problem occurred while setting up the encryption key.
+ */
+static inline int fscrypt_require_key(struct inode *inode)
+{
+ if (IS_ENCRYPTED(inode)) {
+ int err = fscrypt_get_encryption_info(inode, false);
+
+ if (err)
+ return err;
+ if (!fscrypt_has_encryption_key(inode))
+ return -ENOKEY;
+ }
+ return 0;
+}
+
/* keysetup_v1.c */
void fscrypt_put_direct_key(struct fscrypt_direct_key *dk);
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 20b0df47fe6a..79570e0e8e61 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -54,15 +54,12 @@ EXPORT_SYMBOL_GPL(fscrypt_file_open);
int __fscrypt_prepare_link(struct inode *inode, struct inode *dir,
struct dentry *dentry)
{
- int err;
-
- err = fscrypt_require_key(dir);
- if (err)
- return err;
-
- /* ... in case we looked up no-key name before key was added */
- if (dentry->d_flags & DCACHE_NOKEY_NAME)
+ if (fscrypt_is_nokey_name(dentry))
return -ENOKEY;
+ /*
+ * We don't need to separately check that the directory inode's key is
+ * available, as it's implied by the dentry not being a no-key name.
+ */
if (!fscrypt_has_permitted_context(dir, inode))
return -EXDEV;
@@ -75,19 +72,13 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
- int err;
-
- err = fscrypt_require_key(old_dir);
- if (err)
- return err;
-
- err = fscrypt_require_key(new_dir);
- if (err)
- return err;
-
- /* ... in case we looked up no-key name(s) before key was added */
- if ((old_dentry->d_flags | new_dentry->d_flags) & DCACHE_NOKEY_NAME)
+ if (fscrypt_is_nokey_name(old_dentry) ||
+ fscrypt_is_nokey_name(new_dentry))
return -ENOKEY;
+ /*
+ * We don't need to separately check that the directory inodes' keys are
+ * available, as it's implied by the dentries not being no-key names.
+ */
if (old_dir != new_dir) {
if (IS_ENCRYPTED(new_dir) &&
@@ -123,6 +114,20 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
}
EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup);
+int __fscrypt_prepare_readdir(struct inode *dir)
+{
+ return fscrypt_get_encryption_info(dir, true);
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_readdir);
+
+int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ if (attr->ia_valid & ATTR_SIZE)
+ return fscrypt_require_key(d_inode(dentry));
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__fscrypt_prepare_setattr);
+
/**
* fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS
* @inode: the inode on which flags are being changed
@@ -138,6 +143,7 @@ int fscrypt_prepare_setflags(struct inode *inode,
unsigned int oldflags, unsigned int flags)
{
struct fscrypt_info *ci;
+ struct key *key;
struct fscrypt_master_key *mk;
int err;
@@ -153,13 +159,14 @@ int fscrypt_prepare_setflags(struct inode *inode,
ci = inode->i_crypt_info;
if (ci->ci_policy.version != FSCRYPT_POLICY_V2)
return -EINVAL;
- mk = ci->ci_master_key->payload.data[0];
- down_read(&mk->mk_secret_sem);
+ key = ci->ci_master_key;
+ mk = key->payload.data[0];
+ down_read(&key->sem);
if (is_master_key_secret_present(&mk->mk_secret))
err = fscrypt_derive_dirhash_key(ci, mk);
else
err = -ENOKEY;
- up_read(&mk->mk_secret_sem);
+ up_read(&key->sem);
return err;
}
return 0;
@@ -325,7 +332,7 @@ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr,
* Try to set up the symlink's encryption key, but we can continue
* regardless of whether the key is available or not.
*/
- err = fscrypt_get_encryption_info(inode);
+ err = fscrypt_get_encryption_info(inode, false);
if (err)
return ERR_PTR(err);
has_key = fscrypt_has_encryption_key(inode);
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 53cc552a7b8f..0b3ffbb4faf4 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -44,7 +44,7 @@ static void free_master_key(struct fscrypt_master_key *mk)
wipe_master_key_secret(&mk->mk_secret);
- for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) {
+ for (i = 0; i <= FSCRYPT_MODE_MAX; i++) {
fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]);
fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]);
fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_32_keys[i]);
@@ -347,7 +347,6 @@ static int add_new_master_key(struct fscrypt_master_key_secret *secret,
mk->mk_spec = *mk_spec;
move_master_key_secret(&mk->mk_secret, secret);
- init_rwsem(&mk->mk_secret_sem);
refcount_set(&mk->mk_refcount, 1); /* secret is present */
INIT_LIST_HEAD(&mk->mk_decrypted_inodes);
@@ -427,11 +426,8 @@ static int add_existing_master_key(struct fscrypt_master_key *mk,
}
/* Re-add the secret if needed. */
- if (rekey) {
- down_write(&mk->mk_secret_sem);
+ if (rekey)
move_master_key_secret(&mk->mk_secret, secret);
- up_write(&mk->mk_secret_sem);
- }
return 0;
}
@@ -975,10 +971,8 @@ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users)
/* No user claims remaining. Go ahead and wipe the secret. */
dead = false;
if (is_master_key_secret_present(&mk->mk_secret)) {
- down_write(&mk->mk_secret_sem);
wipe_master_key_secret(&mk->mk_secret);
dead = refcount_dec_and_test(&mk->mk_refcount);
- up_write(&mk->mk_secret_sem);
}
up_write(&key->sem);
if (dead) {
diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c
index d595abb8ef90..261293fb7097 100644
--- a/fs/crypto/keysetup.c
+++ b/fs/crypto/keysetup.c
@@ -56,6 +56,8 @@ static struct fscrypt_mode *
select_encryption_mode(const union fscrypt_policy *policy,
const struct inode *inode)
{
+ BUILD_BUG_ON(ARRAY_SIZE(fscrypt_modes) != FSCRYPT_MODE_MAX + 1);
+
if (S_ISREG(inode->i_mode))
return &fscrypt_modes[fscrypt_policy_contents_mode(policy)];
@@ -168,7 +170,7 @@ static int setup_per_mode_enc_key(struct fscrypt_info *ci,
unsigned int hkdf_infolen = 0;
int err;
- if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX))
+ if (WARN_ON(mode_num > FSCRYPT_MODE_MAX))
return -EINVAL;
prep_key = &keys[mode_num];
@@ -335,11 +337,11 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci,
* Find the master key, then set up the inode's actual encryption key.
*
* If the master key is found in the filesystem-level keyring, then the
- * corresponding 'struct key' is returned in *master_key_ret with
- * ->mk_secret_sem read-locked. This is needed to ensure that only one task
- * links the fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race
- * to create an fscrypt_info for the same inode), and to synchronize the master
- * key being removed with a new inode starting to use it.
+ * corresponding 'struct key' is returned in *master_key_ret with its semaphore
+ * read-locked. This is needed to ensure that only one task links the
+ * fscrypt_info into ->mk_decrypted_inodes (as multiple tasks may race to create
+ * an fscrypt_info for the same inode), and to synchronize the master key being
+ * removed with a new inode starting to use it.
*/
static int setup_file_encryption_key(struct fscrypt_info *ci,
bool need_dirhash_key,
@@ -388,7 +390,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
}
mk = key->payload.data[0];
- down_read(&mk->mk_secret_sem);
+ down_read(&key->sem);
/* Has the secret been removed (via FS_IOC_REMOVE_ENCRYPTION_KEY)? */
if (!is_master_key_secret_present(&mk->mk_secret)) {
@@ -431,7 +433,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci,
return 0;
out_release_key:
- up_read(&mk->mk_secret_sem);
+ up_read(&key->sem);
key_put(key);
return err;
}
@@ -534,9 +536,7 @@ fscrypt_setup_encryption_info(struct inode *inode,
res = 0;
out:
if (master_key) {
- struct fscrypt_master_key *mk = master_key->payload.data[0];
-
- up_read(&mk->mk_secret_sem);
+ up_read(&master_key->sem);
key_put(master_key);
}
put_crypt_info(crypt_info);
@@ -546,6 +546,11 @@ out:
/**
* fscrypt_get_encryption_info() - set up an inode's encryption key
* @inode: the inode to set up the key for. Must be encrypted.
+ * @allow_unsupported: if %true, treat an unsupported encryption policy (or
+ * unrecognized encryption context) the same way as the key
+ * being unavailable, instead of returning an error. Use
+ * %false unless the operation being performed is needed in
+ * order for files (or directories) to be deleted.
*
* Set up ->i_crypt_info, if it hasn't already been done.
*
@@ -556,7 +561,7 @@ out:
* encryption key is unavailable. (Use fscrypt_has_encryption_key() to
* distinguish these cases.) Also can return another -errno code.
*/
-int fscrypt_get_encryption_info(struct inode *inode)
+int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported)
{
int res;
union fscrypt_context ctx;
@@ -567,29 +572,38 @@ int fscrypt_get_encryption_info(struct inode *inode)
res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
if (res < 0) {
+ if (res == -ERANGE && allow_unsupported)
+ return 0;
fscrypt_warn(inode, "Error %d getting encryption context", res);
return res;
}
res = fscrypt_policy_from_context(&policy, &ctx, res);
if (res) {
+ if (allow_unsupported)
+ return 0;
fscrypt_warn(inode,
"Unrecognized or corrupt encryption context");
return res;
}
- if (!fscrypt_supported_policy(&policy, inode))
+ if (!fscrypt_supported_policy(&policy, inode)) {
+ if (allow_unsupported)
+ return 0;
return -EINVAL;
+ }
res = fscrypt_setup_encryption_info(inode, &policy,
fscrypt_context_nonce(&ctx),
IS_CASEFOLDED(inode) &&
S_ISDIR(inode->i_mode));
+
+ if (res == -ENOPKG && allow_unsupported) /* Algorithm unavailable? */
+ res = 0;
if (res == -ENOKEY)
res = 0;
return res;
}
-EXPORT_SYMBOL(fscrypt_get_encryption_info);
/**
* fscrypt_prepare_new_inode() - prepare to create a new inode in a directory
@@ -710,7 +724,7 @@ int fscrypt_drop_inode(struct inode *inode)
return 0;
/*
- * Note: since we aren't holding ->mk_secret_sem, the result here can
+ * Note: since we aren't holding the key semaphore, the result here can
* immediately become outdated. But there's no correctness problem with
* unnecessarily evicting. Nor is there a correctness problem with not
* evicting while iput() is racing with the key being removed, since
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
index 4441d9944b9e..a51cef6bd27f 100644
--- a/fs/crypto/policy.c
+++ b/fs/crypto/policy.c
@@ -175,7 +175,10 @@ static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy,
return false;
}
- if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) {
+ if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK |
+ FSCRYPT_POLICY_FLAG_DIRECT_KEY |
+ FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 |
+ FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) {
fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)",
policy->flags);
return false;
@@ -587,7 +590,7 @@ EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_nonce);
int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
{
union fscrypt_policy parent_policy, child_policy;
- int err;
+ int err, err1, err2;
/* No restrictions on file types which are never encrypted */
if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
@@ -617,19 +620,25 @@ int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
* In any case, if an unexpected error occurs, fall back to "forbidden".
*/
- err = fscrypt_get_encryption_info(parent);
+ err = fscrypt_get_encryption_info(parent, true);
if (err)
return 0;
- err = fscrypt_get_encryption_info(child);
+ err = fscrypt_get_encryption_info(child, true);
if (err)
return 0;
- err = fscrypt_get_policy(parent, &parent_policy);
- if (err)
- return 0;
+ err1 = fscrypt_get_policy(parent, &parent_policy);
+ err2 = fscrypt_get_policy(child, &child_policy);
- err = fscrypt_get_policy(child, &child_policy);
- if (err)
+ /*
+ * Allow the case where the parent and child both have an unrecognized
+ * encryption policy, so that files with an unrecognized encryption
+ * policy can be deleted.
+ */
+ if (err1 == -EINVAL && err2 == -EINVAL)
+ return 1;
+
+ if (err1 || err2)
return 0;
return fscrypt_policies_equal(&parent_policy, &child_policy);
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 96c0c86f3fff..0297ad95eb5c 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -7,6 +7,7 @@
#include <linux/efi.h>
#include <linux/fs.h>
#include <linux/ctype.h>
+#include <linux/kmemleak.h>
#include <linux/slab.h>
#include <linux/uuid.h>
@@ -103,6 +104,7 @@ static int efivarfs_create(struct inode *dir, struct dentry *dentry,
var->var.VariableName[i] = '\0';
inode->i_private = var;
+ kmemleak_ignore(var);
err = efivar_entry_add(var, &efivarfs_list);
if (err)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ca50c90adc4c..c6d16353326a 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -118,11 +118,9 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
struct buffer_head *bh = NULL;
struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
- if (IS_ENCRYPTED(inode)) {
- err = fscrypt_get_encryption_info(inode);
- if (err)
- return err;
- }
+ err = fscrypt_prepare_readdir(inode);
+ if (err)
+ return err;
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
@@ -616,13 +614,6 @@ finished:
return 0;
}
-static int ext4_dir_open(struct inode * inode, struct file * filp)
-{
- if (IS_ENCRYPTED(inode))
- return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
- return 0;
-}
-
static int ext4_release_dir(struct inode *inode, struct file *filp)
{
if (filp->private_data)
@@ -664,7 +655,6 @@ const struct file_operations ext4_dir_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.fsync = ext4_sync_file,
- .open = ext4_dir_open,
.release = ext4_release_dir,
};
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf9429484462..65ecaf96d0a4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2695,7 +2695,8 @@ void ext4_insert_dentry(struct inode *inode,
struct ext4_filename *fname);
static inline void ext4_update_dx_flag(struct inode *inode)
{
- if (!ext4_has_feature_dir_index(inode->i_sb)) {
+ if (!ext4_has_feature_dir_index(inode->i_sb) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
/* ext4_iget() should have caught this... */
WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb));
ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 33509266f5a0..5fa8436cd5fa 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -643,13 +643,7 @@ static struct stats dx_show_leaf(struct inode *dir,
name = de->name;
len = de->name_len;
- if (IS_ENCRYPTED(dir))
- res = fscrypt_get_encryption_info(dir);
- if (res) {
- printk(KERN_WARNING "Error setting up"
- " fname crypto: %d\n", res);
- }
- if (!fscrypt_has_encryption_key(dir)) {
+ if (!IS_ENCRYPTED(dir)) {
/* Directory is not encrypted */
ext4fs_dirhash(dir, de->name,
de->name_len, &h);
@@ -1010,7 +1004,7 @@ static int htree_dirblock_to_tree(struct file *dir_file,
EXT4_DIR_REC_LEN(0));
/* Check if the directory is encrypted */
if (IS_ENCRYPTED(dir)) {
- err = fscrypt_get_encryption_info(dir);
+ err = fscrypt_prepare_readdir(dir);
if (err < 0) {
brelse(bh);
return err;
@@ -2195,6 +2189,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (!dentry->d_name.len)
return -EINVAL;
+ if (fscrypt_is_nokey_name(dentry))
+ return -ENOKEY;
+
#ifdef CONFIG_UNICODE
if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6633b20224d5..94472044f4c1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2638,10 +2638,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
} else if (test_opt2(sb, DAX_INODE)) {
SEQ_OPTS_PUTS("dax=inode");
}
-
- if (test_opt2(sb, JOURNAL_FAST_COMMIT))
- SEQ_OPTS_PUTS("fast_commit");
-
ext4_show_quota_options(seq, sb);
return 0;
}
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4b9ef8bbfa4a..049500f1e764 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1022,7 +1022,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
int err = 0;
if (IS_ENCRYPTED(inode)) {
- err = fscrypt_get_encryption_info(inode);
+ err = fscrypt_prepare_readdir(inode);
if (err)
goto out;
@@ -1081,19 +1081,11 @@ out:
return err < 0 ? err : 0;
}
-static int f2fs_dir_open(struct inode *inode, struct file *filp)
-{
- if (IS_ENCRYPTED(inode))
- return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
- return 0;
-}
-
const struct file_operations f2fs_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = f2fs_readdir,
.fsync = f2fs_sync_file,
- .open = f2fs_dir_open,
.unlocked_ioctl = f2fs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = f2fs_compat_ioctl,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cb700d797296..9a321c52face 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3251,6 +3251,8 @@ bool f2fs_empty_dir(struct inode *dir);
static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
{
+ if (fscrypt_is_nokey_name(dentry))
+ return -ENOKEY;
return f2fs_do_add_link(d_inode(dentry->d_parent), &dentry->d_name,
inode, inode->i_ino, inode->i_mode);
}
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d98a2e5dab9f..35a6fd103761 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1035,6 +1035,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_node.next = NULL;
gl->gl_flags = 0;
gl->gl_name = name;
+ lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
gl->gl_lockref.count = 1;
gl->gl_state = LM_ST_UNLOCKED;
gl->gl_target = LM_ST_UNLOCKED;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 6c1432d78dce..3faa421568b0 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -245,7 +245,7 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
const char *fs_id_buf)
{
- struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
+ struct gfs2_rgrpd *rgd = gl->gl_object;
if (rgd)
gfs2_rgrp_dump(seq, rgd, fs_id_buf);
@@ -571,7 +571,19 @@ static int freeze_go_sync(struct gfs2_glock *gl)
int error = 0;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- if (gl->gl_req == LM_ST_EXCLUSIVE && !gfs2_withdrawn(sdp)) {
+ /*
+ * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
+ * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
+ * all the nodes should have the freeze glock in SH mode and they all
+ * call do_xmote: One for EX and the others for UN. They ALL must
+ * freeze locally, and they ALL must queue freeze work. The freeze_work
+ * calls freeze_func, which tries to reacquire the freeze glock in SH,
+ * effectively waiting for the thaw on the node who holds it in EX.
+ * Once thawed, the work func acquires the freeze glock in
+ * SH and everybody goes back to thawed.
+ */
+ if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
+ !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
error = freeze_super(sdp->sd_vfs);
if (error) {
@@ -770,6 +782,7 @@ const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_callback = iopen_go_callback,
.go_demote_ok = iopen_go_demote_ok,
.go_flags = GLOF_LRU | GLOF_NONDISK,
+ .go_subclass = 1,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d7707307f4b1..f8858d995b24 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -247,6 +247,7 @@ struct gfs2_glock_operations {
const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
void (*go_free)(struct gfs2_glock *gl);
+ const int go_subclass;
const int go_type;
const unsigned long go_flags;
#define GLOF_ASPACE 1 /* address space attached */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 077ccb1b3ccc..65ae4fc28ede 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -150,6 +150,8 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
goto fail;
+ if (blktype != GFS2_BLKST_UNLINKED)
+ gfs2_cancel_delete_work(io_gl);
if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
/*
@@ -180,8 +182,6 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
goto fail;
- if (blktype != GFS2_BLKST_UNLINKED)
- gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
io_gl = NULL;
@@ -725,13 +725,19 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
flush_delayed_work(&ip->i_gl->gl_work);
glock_set_object(ip->i_gl, ip);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+ error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
goto fail_free_inode;
+ gfs2_cancel_delete_work(io_gl);
+ glock_set_object(io_gl, ip);
+
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+ if (error)
+ goto fail_gunlock2;
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
- goto fail_free_inode;
+ goto fail_gunlock2;
if (blocks > 1) {
ip->i_eattr = ip->i_no_addr + 1;
@@ -740,18 +746,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);
- error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
- if (error)
- goto fail_free_inode;
-
BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
- gfs2_cancel_delete_work(ip->i_iopen_gh.gh_gl);
- glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_set_iop(inode);
insert_inode_hash(inode);
@@ -803,6 +803,7 @@ fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_gunlock2:
clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
+ glock_clear_object(io_gl, ip);
gfs2_glock_put(io_gl);
fail_free_inode:
if (ip->i_gl) {
@@ -2116,6 +2117,25 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset)
return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
}
+static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
+ int flags)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_glock *gl = ip->i_gl;
+ struct gfs2_holder *gh;
+ int error;
+
+ gh = gfs2_glock_is_locked_by_me(gl);
+ if (gh && !gfs2_glock_is_held_excl(gl)) {
+ gfs2_glock_dq(gh);
+ gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, gh);
+ error = gfs2_glock_nq(gh);
+ if (error)
+ return error;
+ }
+ return generic_update_time(inode, time, flags);
+}
+
const struct inode_operations gfs2_file_iops = {
.permission = gfs2_permission,
.setattr = gfs2_setattr,
@@ -2124,6 +2144,7 @@ const struct inode_operations gfs2_file_iops = {
.fiemap = gfs2_fiemap,
.get_acl = gfs2_get_acl,
.set_acl = gfs2_set_acl,
+ .update_time = gfs2_update_time,
};
const struct inode_operations gfs2_dir_iops = {
@@ -2143,6 +2164,7 @@ const struct inode_operations gfs2_dir_iops = {
.fiemap = gfs2_fiemap,
.get_acl = gfs2_get_acl,
.set_acl = gfs2_set_acl,
+ .update_time = gfs2_update_time,
.atomic_open = gfs2_atomic_open,
};
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index f7addc6197ed..5e8eef9990e3 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -985,6 +985,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
if (error < 0)
return error;
+ if (RB_EMPTY_ROOT(&sdp->sd_rindex_tree)) {
+ fs_err(sdp, "no resource groups found in the file system.\n");
+ return -ENOENT;
+ }
set_rgrp_preferences(sdp);
sdp->sd_rindex_uptodate = 1;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4ead291b2976..86dac2b2e276 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -205,6 +205,7 @@ struct fixed_file_ref_node {
struct list_head file_list;
struct fixed_file_data *file_data;
struct llist_node llist;
+ bool done;
};
struct fixed_file_data {
@@ -478,6 +479,7 @@ struct io_sr_msg {
struct io_open {
struct file *file;
int dfd;
+ bool ignore_nonblock;
struct filename *filename;
struct open_how how;
unsigned long nofile;
@@ -1282,7 +1284,7 @@ static bool io_identity_cow(struct io_kiocb *req)
*/
io_init_identity(id);
if (creds)
- req->work.identity->creds = creds;
+ id->creds = creds;
/* add one for this request */
refcount_inc(&id->count);
@@ -1311,22 +1313,6 @@ static bool io_grab_identity(struct io_kiocb *req)
return false;
req->work.flags |= IO_WQ_WORK_FSIZE;
}
-
- if (!(req->work.flags & IO_WQ_WORK_FILES) &&
- (def->work_flags & IO_WQ_WORK_FILES) &&
- !(req->flags & REQ_F_NO_FILE_TABLE)) {
- if (id->files != current->files ||
- id->nsproxy != current->nsproxy)
- return false;
- atomic_inc(&id->files->count);
- get_nsproxy(id->nsproxy);
- req->flags |= REQ_F_INFLIGHT;
-
- spin_lock_irq(&ctx->inflight_lock);
- list_add(&req->inflight_entry, &ctx->inflight_list);
- spin_unlock_irq(&ctx->inflight_lock);
- req->work.flags |= IO_WQ_WORK_FILES;
- }
#ifdef CONFIG_BLK_CGROUP
if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
(def->work_flags & IO_WQ_WORK_BLKCG)) {
@@ -1368,6 +1354,21 @@ static bool io_grab_identity(struct io_kiocb *req)
}
spin_unlock(&current->fs->lock);
}
+ if (!(req->work.flags & IO_WQ_WORK_FILES) &&
+ (def->work_flags & IO_WQ_WORK_FILES) &&
+ !(req->flags & REQ_F_NO_FILE_TABLE)) {
+ if (id->files != current->files ||
+ id->nsproxy != current->nsproxy)
+ return false;
+ atomic_inc(&id->files->count);
+ get_nsproxy(id->nsproxy);
+ req->flags |= REQ_F_INFLIGHT;
+
+ spin_lock_irq(&ctx->inflight_lock);
+ list_add(&req->inflight_entry, &ctx->inflight_list);
+ spin_unlock_irq(&ctx->inflight_lock);
+ req->work.flags |= IO_WQ_WORK_FILES;
+ }
return true;
}
@@ -2577,7 +2578,6 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
}
end_req:
req_set_fail_links(req);
- io_req_complete(req, ret);
return false;
}
#endif
@@ -3192,7 +3192,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
rw->free_iovec = iovec;
rw->bytes_done = 0;
/* can only be fixed buffers, no need to do anything */
- if (iter->type == ITER_BVEC)
+ if (iov_iter_is_bvec(iter))
return;
if (!iovec) {
unsigned iov_off = 0;
@@ -3795,6 +3795,7 @@ static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
return ret;
}
req->open.nofile = rlimit(RLIMIT_NOFILE);
+ req->open.ignore_nonblock = false;
req->flags |= REQ_F_NEED_CLEANUP;
return 0;
}
@@ -3838,7 +3839,7 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
struct file *file;
int ret;
- if (force_nonblock)
+ if (force_nonblock && !req->open.ignore_nonblock)
return -EAGAIN;
ret = build_open_flags(&req->open.how, &op);
@@ -3853,6 +3854,21 @@ static int io_openat2(struct io_kiocb *req, bool force_nonblock)
if (IS_ERR(file)) {
put_unused_fd(ret);
ret = PTR_ERR(file);
+ /*
+ * A work-around to ensure that /proc/self works that way
+ * that it should - if we get -EOPNOTSUPP back, then assume
+ * that proc_self_get_link() failed us because we're in async
+ * context. We should be safe to retry this from the task
+ * itself with force_nonblock == false set, as it should not
+ * block on lookup. Would be nice to know this upfront and
+ * avoid the async dance, but doesn't seem feasible.
+ */
+ if (ret == -EOPNOTSUPP && io_wq_current_is_worker()) {
+ req->open.ignore_nonblock = true;
+ refcount_inc(&req->refs);
+ io_req_task_queue(req);
+ return 0;
+ }
} else {
fsnotify_open(file);
fd_install(ret, file);
@@ -4483,7 +4499,8 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
return -EFAULT;
if (clen < 0)
return -EINVAL;
- sr->len = iomsg->iov[0].iov_len;
+ sr->len = clen;
+ iomsg->iov[0].iov_len = clen;
iomsg->iov = NULL;
} else {
ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
@@ -6957,9 +6974,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return -ENXIO;
spin_lock(&data->lock);
- if (!list_empty(&data->ref_list))
- ref_node = list_first_entry(&data->ref_list,
- struct fixed_file_ref_node, node);
+ ref_node = data->node;
spin_unlock(&data->lock);
if (ref_node)
percpu_ref_kill(&ref_node->refs);
@@ -7308,10 +7323,6 @@ static void __io_file_put_work(struct fixed_file_ref_node *ref_node)
kfree(pfile);
}
- spin_lock(&file_data->lock);
- list_del(&ref_node->node);
- spin_unlock(&file_data->lock);
-
percpu_ref_exit(&ref_node->refs);
kfree(ref_node);
percpu_ref_put(&file_data->refs);
@@ -7338,17 +7349,32 @@ static void io_file_put_work(struct work_struct *work)
static void io_file_data_ref_zero(struct percpu_ref *ref)
{
struct fixed_file_ref_node *ref_node;
+ struct fixed_file_data *data;
struct io_ring_ctx *ctx;
- bool first_add;
+ bool first_add = false;
int delay = HZ;
ref_node = container_of(ref, struct fixed_file_ref_node, refs);
- ctx = ref_node->file_data->ctx;
+ data = ref_node->file_data;
+ ctx = data->ctx;
+
+ spin_lock(&data->lock);
+ ref_node->done = true;
- if (percpu_ref_is_dying(&ctx->file_data->refs))
+ while (!list_empty(&data->ref_list)) {
+ ref_node = list_first_entry(&data->ref_list,
+ struct fixed_file_ref_node, node);
+ /* recycle ref nodes in order */
+ if (!ref_node->done)
+ break;
+ list_del(&ref_node->node);
+ first_add |= llist_add(&ref_node->llist, &ctx->file_put_llist);
+ }
+ spin_unlock(&data->lock);
+
+ if (percpu_ref_is_dying(&data->refs))
delay = 0;
- first_add = llist_add(&ref_node->llist, &ctx->file_put_llist);
if (!delay)
mod_delayed_work(system_wq, &ctx->file_put_work, 0);
else if (first_add)
@@ -7372,6 +7398,7 @@ static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
INIT_LIST_HEAD(&ref_node->node);
INIT_LIST_HEAD(&ref_node->file_list);
ref_node->file_data = ctx->file_data;
+ ref_node->done = false;
return ref_node;
}
@@ -7467,7 +7494,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
file_data->node = ref_node;
spin_lock(&file_data->lock);
- list_add(&ref_node->node, &file_data->ref_list);
+ list_add_tail(&ref_node->node, &file_data->ref_list);
spin_unlock(&file_data->lock);
percpu_ref_get(&file_data->refs);
return ret;
@@ -7626,7 +7653,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
if (needs_switch) {
percpu_ref_kill(&data->node->refs);
spin_lock(&data->lock);
- list_add(&ref_node->node, &data->ref_list);
+ list_add_tail(&ref_node->node, &data->ref_list);
data->node = ref_node;
spin_unlock(&data->lock);
percpu_ref_get(&ctx->file_data->refs);
@@ -9156,6 +9183,7 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
{
struct file *file;
int ret;
+ int fd;
#if defined(CONFIG_UNIX)
ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP,
@@ -9167,12 +9195,12 @@ static int io_uring_get_fd(struct io_ring_ctx *ctx)
ret = get_unused_fd_flags(O_RDWR | O_CLOEXEC);
if (ret < 0)
goto err;
+ fd = ret;
file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx,
O_RDWR | O_CLOEXEC);
if (IS_ERR(file)) {
-err_fd:
- put_unused_fd(ret);
+ put_unused_fd(fd);
ret = PTR_ERR(file);
goto err;
}
@@ -9180,12 +9208,14 @@ err_fd:
#if defined(CONFIG_UNIX)
ctx->ring_sock->file = file;
#endif
- if (unlikely(io_uring_add_task_file(ctx, file))) {
- file = ERR_PTR(-ENOMEM);
- goto err_fd;
+ ret = io_uring_add_task_file(ctx, file);
+ if (ret) {
+ fput(file);
+ put_unused_fd(fd);
+ goto err;
}
- fd_install(ret, file);
- return ret;
+ fd_install(fd, file);
+ return fd;
err:
#if defined(CONFIG_UNIX)
sock_release(ctx->ring_sock);
@@ -9225,14 +9255,16 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
* to a power-of-two, if it isn't already. We do NOT impose
* any cq vs sq ring sizing.
*/
- p->cq_entries = roundup_pow_of_two(p->cq_entries);
- if (p->cq_entries < p->sq_entries)
+ if (!p->cq_entries)
return -EINVAL;
if (p->cq_entries > IORING_MAX_CQ_ENTRIES) {
if (!(p->flags & IORING_SETUP_CLAMP))
return -EINVAL;
p->cq_entries = IORING_MAX_CQ_ENTRIES;
}
+ p->cq_entries = roundup_pow_of_two(p->cq_entries);
+ if (p->cq_entries < p->sq_entries)
+ return -EINVAL;
} else {
p->cq_entries = 2 * p->sq_entries;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0c3d5e3b24b2..188f79d76988 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -566,12 +566,14 @@ static int __jbd2_journal_force_commit(journal_t *journal)
}
/**
- * Force and wait upon a commit if the calling process is not within
- * transaction. This is used for forcing out undo-protected data which contains
- * bitmaps, when the fs is running out of space.
+ * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
+ * calling process is not within transaction.
*
* @journal: journal to force
* Returns true if progress was made.
+ *
+ * This is used for forcing out undo-protected data which contains
+ * bitmaps, when the fs is running out of space.
*/
int jbd2_journal_force_commit_nested(journal_t *journal)
{
@@ -582,7 +584,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
}
/**
- * int journal_force_commit() - force any uncommitted transactions
+ * jbd2_journal_force_commit() - force any uncommitted transactions
* @journal: journal to force
*
* Caller want unconditional commit. We can only force the running transaction
@@ -1881,7 +1883,7 @@ static int load_superblock(journal_t *journal)
/**
- * int jbd2_journal_load() - Read journal from disk.
+ * jbd2_journal_load() - Read journal from disk.
* @journal: Journal to act on.
*
* Given a journal_t structure which tells us which disk blocks contain
@@ -1951,7 +1953,7 @@ recovery_error:
}
/**
- * void jbd2_journal_destroy() - Release a journal_t structure.
+ * jbd2_journal_destroy() - Release a journal_t structure.
* @journal: Journal to act on.
*
* Release a journal_t structure once it is no longer in use by the
@@ -2028,7 +2030,7 @@ int jbd2_journal_destroy(journal_t *journal)
/**
- *int jbd2_journal_check_used_features() - Check if features specified are used.
+ * jbd2_journal_check_used_features() - Check if features specified are used.
* @journal: Journal to check.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -2063,7 +2065,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
}
/**
- * int jbd2_journal_check_available_features() - Check feature set in journalling layer
+ * jbd2_journal_check_available_features() - Check feature set in journalling layer
* @journal: Journal to check.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -2126,7 +2128,7 @@ jbd2_journal_initialize_fast_commit(journal_t *journal)
}
/**
- * int jbd2_journal_set_features() - Mark a given journal feature in the superblock
+ * jbd2_journal_set_features() - Mark a given journal feature in the superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
@@ -2217,7 +2219,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
}
/*
- * jbd2_journal_clear_features () - Clear a given journal feature in the
+ * jbd2_journal_clear_features() - Clear a given journal feature in the
* superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
@@ -2246,7 +2248,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
EXPORT_SYMBOL(jbd2_journal_clear_features);
/**
- * int jbd2_journal_flush () - Flush journal
+ * jbd2_journal_flush() - Flush journal
* @journal: Journal to act on.
*
* Flush all data for a given journal to disk and empty the journal.
@@ -2321,7 +2323,7 @@ out:
}
/**
- * int jbd2_journal_wipe() - Wipe journal contents
+ * jbd2_journal_wipe() - Wipe journal contents
* @journal: Journal to act on.
* @write: flag (see below)
*
@@ -2362,7 +2364,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
}
/**
- * void jbd2_journal_abort () - Shutdown the journal immediately.
+ * jbd2_journal_abort () - Shutdown the journal immediately.
* @journal: the journal to shutdown.
* @errno: an error number to record in the journal indicating
* the reason for the shutdown.
@@ -2453,7 +2455,7 @@ void jbd2_journal_abort(journal_t *journal, int errno)
}
/**
- * int jbd2_journal_errno () - returns the journal's error state.
+ * jbd2_journal_errno() - returns the journal's error state.
* @journal: journal to examine.
*
* This is the errno number set with jbd2_journal_abort(), the last
@@ -2477,7 +2479,7 @@ int jbd2_journal_errno(journal_t *journal)
}
/**
- * int jbd2_journal_clear_err () - clears the journal's error state
+ * jbd2_journal_clear_err() - clears the journal's error state
* @journal: journal to act on.
*
* An error must be cleared or acked to take a FS out of readonly
@@ -2497,7 +2499,7 @@ int jbd2_journal_clear_err(journal_t *journal)
}
/**
- * void jbd2_journal_ack_err() - Ack journal err.
+ * jbd2_journal_ack_err() - Ack journal err.
* @journal: journal to act on.
*
* An error must be cleared or acked to take a FS out of readonly
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index d54f04674e8e..9396666b7314 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(jbd2__journal_start);
/**
- * handle_t *jbd2_journal_start() - Obtain a new handle.
+ * jbd2_journal_start() - Obtain a new handle.
* @journal: Journal to start transaction on.
* @nblocks: number of block buffer we might modify
*
@@ -566,7 +566,7 @@ void jbd2_journal_free_reserved(handle_t *handle)
EXPORT_SYMBOL(jbd2_journal_free_reserved);
/**
- * int jbd2_journal_start_reserved() - start reserved handle
+ * jbd2_journal_start_reserved() - start reserved handle
* @handle: handle to start
* @type: for handle statistics
* @line_no: for handle statistics
@@ -620,7 +620,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type,
EXPORT_SYMBOL(jbd2_journal_start_reserved);
/**
- * int jbd2_journal_extend() - extend buffer credits.
+ * jbd2_journal_extend() - extend buffer credits.
* @handle: handle to 'extend'
* @nblocks: nr blocks to try to extend by.
* @revoke_records: number of revoke records to try to extend by.
@@ -745,7 +745,7 @@ static void stop_this_handle(handle_t *handle)
}
/**
- * int jbd2_journal_restart() - restart a handle .
+ * jbd2__journal_restart() - restart a handle .
* @handle: handle to restart
* @nblocks: nr credits requested
* @revoke_records: number of revoke record credits requested
@@ -815,7 +815,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
EXPORT_SYMBOL(jbd2_journal_restart);
/**
- * void jbd2_journal_lock_updates () - establish a transaction barrier.
+ * jbd2_journal_lock_updates () - establish a transaction barrier.
* @journal: Journal to establish a barrier on.
*
* This locks out any further updates from being started, and blocks
@@ -874,7 +874,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
}
/**
- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
+ * jbd2_journal_unlock_updates () - release barrier
* @journal: Journal to release the barrier on.
*
* Release a transaction barrier obtained with jbd2_journal_lock_updates().
@@ -1182,7 +1182,8 @@ out:
}
/**
- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * jbd2_journal_get_write_access() - notify intent to modify a buffer
+ * for metadata (not data) update.
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
*
@@ -1226,7 +1227,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
* unlocked buffer beforehand. */
/**
- * int jbd2_journal_get_create_access () - notify intent to use newly created bh
+ * jbd2_journal_get_create_access () - notify intent to use newly created bh
* @handle: transaction to new buffer to
* @bh: new buffer.
*
@@ -1306,7 +1307,7 @@ out:
}
/**
- * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
+ * jbd2_journal_get_undo_access() - Notify intent to modify metadata with
* non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
@@ -1383,7 +1384,7 @@ out:
}
/**
- * void jbd2_journal_set_triggers() - Add triggers for commit writeout
+ * jbd2_journal_set_triggers() - Add triggers for commit writeout
* @bh: buffer to trigger on
* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
*
@@ -1425,7 +1426,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
}
/**
- * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
+ * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
* @handle: transaction to add buffer to.
* @bh: buffer to mark
*
@@ -1593,7 +1594,7 @@ out:
}
/**
- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
+ * jbd2_journal_forget() - bforget() for potentially-journaled buffers.
* @handle: transaction handle
* @bh: bh to 'forget'
*
@@ -1762,7 +1763,7 @@ drop:
}
/**
- * int jbd2_journal_stop() - complete a transaction
+ * jbd2_journal_stop() - complete a transaction
* @handle: transaction to complete.
*
* All done for a particular handle.
@@ -2080,7 +2081,7 @@ out:
}
/**
- * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
+ * jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
* @page: to try and free
*
@@ -2411,7 +2412,7 @@ zap_buffer_unlocked:
}
/**
- * void jbd2_journal_invalidatepage()
+ * jbd2_journal_invalidatepage()
* @journal: journal to use for flush...
* @page: page to flush
* @offset: start of the range to invalidate
diff --git a/fs/libfs.c b/fs/libfs.c
index fc34361c1489..7124c2e8df2f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -959,7 +959,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
struct simple_attr *attr;
- u64 val;
+ unsigned long long val;
size_t size;
ssize_t ret;
@@ -977,7 +977,9 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
goto out;
attr->set_buf[size] = '\0';
- val = simple_strtoll(attr->set_buf, NULL, 0);
+ ret = kstrtoull(attr->set_buf, 0, &val);
+ if (ret)
+ goto out;
ret = attr->set(attr->data, val);
if (ret == 0)
ret = len; /* on success, claim we got the whole input */
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 88e1763e02f3..e2a488d403a6 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -205,3 +205,12 @@ config NFS_DISABLE_UDP_SUPPORT
Choose Y here to disable the use of NFS over UDP. NFS over UDP
on modern networks (1Gb+) can lead to data corruption caused by
fragmentation during high loads.
+
+config NFS_V4_2_READ_PLUS
+ bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation"
+ depends on NFS_V4_2
+ default n
+ help
+ This is intended for developers only. The READ_PLUS operation has
+ been shown to have issues under specific conditions and should not
+ be used in production.
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index a163533446fa..24bf5797f88a 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -838,7 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_pgio_mirror *pgm;
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
- u32 ds_idx, i;
+ u32 ds_idx;
retry:
ff_layout_pg_check_layout(pgio, req);
@@ -864,11 +864,9 @@ retry:
goto retry;
}
- for (i = 0; i < pgio->pg_mirror_count; i++) {
- mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
- pgm = &pgio->pg_mirrors[i];
- pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
- }
+ mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
+ pgm = &pgio->pg_mirrors[0];
+ pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
pgio->pg_mirror_idx = ds_idx;
@@ -985,6 +983,21 @@ out:
return 1;
}
+static u32
+ff_layout_pg_set_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ u32 old = desc->pg_mirror_idx;
+
+ desc->pg_mirror_idx = idx;
+ return old;
+}
+
+static struct nfs_pgio_mirror *
+ff_layout_pg_get_mirror_write(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ return &desc->pg_mirrors[idx];
+}
+
static const struct nfs_pageio_ops ff_layout_pg_read_ops = {
.pg_init = ff_layout_pg_init_read,
.pg_test = pnfs_generic_pg_test,
@@ -998,6 +1011,8 @@ static const struct nfs_pageio_ops ff_layout_pg_write_ops = {
.pg_doio = pnfs_generic_pg_writepages,
.pg_get_mirror_count = ff_layout_pg_get_mirror_count_write,
.pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_get_mirror = ff_layout_pg_get_mirror_write,
+ .pg_set_mirror = ff_layout_pg_set_mirror_write,
};
static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 2b2211d1234e..4fc61e3d098d 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1241,12 +1241,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
.rpc_resp = &res,
};
u32 xdrlen;
- int ret, np;
+ int ret, np, i;
+ ret = -ENOMEM;
res.scratch = alloc_page(GFP_KERNEL);
if (!res.scratch)
- return -ENOMEM;
+ goto out;
xdrlen = nfs42_listxattr_xdrsize(buflen);
if (xdrlen > server->lxasize)
@@ -1254,9 +1255,12 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
np = xdrlen / PAGE_SIZE + 1;
pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL);
- if (pages == NULL) {
- __free_page(res.scratch);
- return -ENOMEM;
+ if (!pages)
+ goto out_free_scratch;
+ for (i = 0; i < np; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i])
+ goto out_free_pages;
}
arg.xattr_pages = pages;
@@ -1271,14 +1275,15 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
*eofp = res.eof;
}
+out_free_pages:
while (--np >= 0) {
if (pages[np])
__free_page(pages[np]);
}
-
- __free_page(res.scratch);
kfree(pages);
-
+out_free_scratch:
+ __free_page(res.scratch);
+out:
return ret;
}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 6e060a88f98c..8432bd6b95f0 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -1528,7 +1528,6 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
hdr.replen);
- req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
encode_nops(&hdr);
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 9d354de613da..57b3821d975a 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -377,10 +377,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
goto out_stateowner;
set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags);
- set_bit(NFS_OPEN_STATE, &ctx->state->flags);
memcpy(&ctx->state->open_stateid.other, &stateid->other,
NFS4_STATEID_OTHER_SIZE);
update_open_stateid(ctx->state, stateid, NULL, filep->f_mode);
+ set_bit(NFS_OPEN_STATE, &ctx->state->flags);
nfs_file_set_open_context(filep, ctx);
put_nfs_open_context(ctx);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9e0ca9b2b210..e89468678ae1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5309,7 +5309,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
nfs4_read_done_cb(task, hdr);
}
-#ifdef CONFIG_NFS_V4_2
+#if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
{
if (server->caps & NFS_CAP_READ_PLUS)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6985cacf4700..78c9c4bdef2b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -31,13 +31,29 @@
static struct kmem_cache *nfs_page_cachep;
static const struct rpc_call_ops nfs_pgio_common_ops;
+static struct nfs_pgio_mirror *
+nfs_pgio_get_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ if (desc->pg_ops->pg_get_mirror)
+ return desc->pg_ops->pg_get_mirror(desc, idx);
+ return &desc->pg_mirrors[0];
+}
+
struct nfs_pgio_mirror *
nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
{
- return &desc->pg_mirrors[desc->pg_mirror_idx];
+ return nfs_pgio_get_mirror(desc, desc->pg_mirror_idx);
}
EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror);
+static u32
+nfs_pgio_set_current_mirror(struct nfs_pageio_descriptor *desc, u32 idx)
+{
+ if (desc->pg_ops->pg_set_mirror)
+ return desc->pg_ops->pg_set_mirror(desc, idx);
+ return desc->pg_mirror_idx;
+}
+
void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr))
@@ -1259,7 +1275,7 @@ static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
return;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
+ mirror = nfs_pgio_get_mirror(desc, midx);
desc->pg_completion_ops->error_cleanup(&mirror->pg_list,
desc->pg_error);
}
@@ -1293,12 +1309,12 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
goto out_failed;
}
- desc->pg_mirror_idx = midx;
+ nfs_pgio_set_current_mirror(desc, midx);
if (!nfs_pageio_add_request_mirror(desc, dupreq))
goto out_cleanup_subreq;
}
- desc->pg_mirror_idx = 0;
+ nfs_pgio_set_current_mirror(desc, 0);
if (!nfs_pageio_add_request_mirror(desc, req))
goto out_failed;
@@ -1320,10 +1336,12 @@ out_failed:
static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
u32 mirror_idx)
{
- struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx];
- u32 restore_idx = desc->pg_mirror_idx;
+ struct nfs_pgio_mirror *mirror;
+ u32 restore_idx;
+
+ restore_idx = nfs_pgio_set_current_mirror(desc, mirror_idx);
+ mirror = nfs_pgio_current_mirror(desc);
- desc->pg_mirror_idx = mirror_idx;
for (;;) {
nfs_pageio_doio(desc);
if (desc->pg_error < 0 || !mirror->pg_recoalesce)
@@ -1331,7 +1349,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc,
if (!nfs_do_recoalesce(desc))
break;
}
- desc->pg_mirror_idx = restore_idx;
+ nfs_pgio_set_current_mirror(desc, restore_idx);
}
/*
@@ -1405,7 +1423,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
u32 midx;
for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
+ mirror = nfs_pgio_get_mirror(desc, midx);
if (!list_empty(&mirror->pg_list)) {
prev = nfs_list_entry(mirror->pg_list.prev);
if (index != prev->wb_index + 1) {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index a960ec3a569a..8d3ad5ef2925 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -178,6 +178,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
struct inode *inode = d_inode(dentry);
struct dentry *parent;
bool parent_watched = dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED;
+ bool parent_needed, parent_interested;
__u32 p_mask;
struct inode *p_inode = NULL;
struct name_snapshot name;
@@ -193,7 +194,8 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
return 0;
parent = NULL;
- if (!parent_watched && !fsnotify_event_needs_parent(inode, mnt, mask))
+ parent_needed = fsnotify_event_needs_parent(inode, mnt, mask);
+ if (!parent_watched && !parent_needed)
goto notify;
/* Does parent inode care about events on children? */
@@ -205,17 +207,17 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data,
/*
* Include parent/name in notification either if some notification
- * groups require parent info (!parent_watched case) or the parent is
- * interested in this event.
+ * groups require parent info or the parent is interested in this event.
*/
- if (!parent_watched || (mask & p_mask & ALL_FSNOTIFY_EVENTS)) {
+ parent_interested = mask & p_mask & ALL_FSNOTIFY_EVENTS;
+ if (parent_needed || parent_interested) {
/* When notifying parent, child should be passed as data */
WARN_ON_ONCE(inode != fsnotify_data_inode(data, data_type));
/* Notify both parent and child with child name info */
take_dentry_name_snapshot(&name, dentry);
file_name = &name.name;
- if (parent_watched)
+ if (parent_interested)
mask |= FS_EVENT_ON_CHILD;
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 72cd69bcaf4a..cc71ce3466dc 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,6 +16,13 @@ static const char *proc_self_get_link(struct dentry *dentry,
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
+ /*
+ * Not currently supported. Once we can inherit all of struct pid,
+ * we can allow this.
+ */
+ if (current->flags & PF_KTHREAD)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (!tgid)
return ERR_PTR(-ENOENT);
/* max length of unsigned int in decimal + NULL term */
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 217aa2705d5d..ee5a235b3056 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1599,11 +1599,15 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
src = *ppos;
svpfn = src / PM_ENTRY_BYTES;
- start_vaddr = svpfn << PAGE_SHIFT;
end_vaddr = mm->task_size;
/* watch out for wraparound */
- if (svpfn > mm->task_size >> PAGE_SHIFT)
+ start_vaddr = end_vaddr;
+ if (svpfn <= (ULONG_MAX >> PAGE_SHIFT))
+ start_vaddr = untagged_addr(svpfn << PAGE_SHIFT);
+
+ /* Ensure the address is inside the task */
+ if (start_vaddr > mm->task_size)
start_vaddr = end_vaddr;
/*
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3b20e21604e7..03a369ccd28c 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -168,12 +168,14 @@ EXPORT_SYMBOL(seq_read);
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct seq_file *m = iocb->ki_filp->private_data;
- size_t size = iov_iter_count(iter);
size_t copied = 0;
size_t n;
void *p;
int err = 0;
+ if (!iov_iter_count(iter))
+ return 0;
+
mutex_lock(&m->lock);
/*
@@ -206,36 +208,34 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!m->buf)
goto Enomem;
}
- /* if not empty - flush it first */
+ // something left in the buffer - copy it out first
if (m->count) {
- n = min(m->count, size);
- if (copy_to_iter(m->buf + m->from, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf + m->from, m->count, iter);
m->count -= n;
m->from += n;
- size -= n;
copied += n;
- if (!size)
+ if (m->count) // hadn't managed to copy everything
goto Done;
}
- /* we need at least one record in buffer */
+ // get a non-empty record in the buffer
m->from = 0;
p = m->op->start(m, &m->index);
while (1) {
err = PTR_ERR(p);
- if (!p || IS_ERR(p))
+ if (!p || IS_ERR(p)) // EOF or an error
break;
err = m->op->show(m, p);
- if (err < 0)
+ if (err < 0) // hard error
break;
- if (unlikely(err))
+ if (unlikely(err)) // ->show() says "skip it"
m->count = 0;
- if (unlikely(!m->count)) {
+ if (unlikely(!m->count)) { // empty record
p = m->op->next(m, p, &m->index);
continue;
}
- if (m->count < m->size)
+ if (!seq_has_overflowed(m)) // got it
goto Fill;
+ // need a bigger buffer
m->op->stop(m, p);
kvfree(m->buf);
m->count = 0;
@@ -244,11 +244,14 @@ ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto Enomem;
p = m->op->start(m, &m->index);
}
+ // EOF or an error
m->op->stop(m, p);
m->count = 0;
goto Done;
Fill:
- /* they want more? let's try to get some more */
+ // one non-empty record is in the buffer; if they want more,
+ // try to fit more in, but in any case we need to advance
+ // the iterator once for every record shown.
while (1) {
size_t offs = m->count;
loff_t pos = m->index;
@@ -259,30 +262,27 @@ Fill:
m->op->next);
m->index++;
}
- if (!p || IS_ERR(p)) {
- err = PTR_ERR(p);
+ if (!p || IS_ERR(p)) // no next record for us
break;
- }
- if (m->count >= size)
+ if (m->count >= iov_iter_count(iter))
break;
err = m->op->show(m, p);
- if (seq_has_overflowed(m) || err) {
+ if (err > 0) { // ->show() says "skip it"
m->count = offs;
- if (likely(err <= 0))
- break;
+ } else if (err || seq_has_overflowed(m)) {
+ m->count = offs;
+ break;
}
}
m->op->stop(m, p);
- n = min(m->count, size);
- if (copy_to_iter(m->buf, n, iter) != n)
- goto Efault;
+ n = copy_to_iter(m->buf, m->count, iter);
copied += n;
m->count -= n;
m->from = n;
Done:
- if (!copied)
- copied = err;
- else {
+ if (unlikely(!copied)) {
+ copied = m->count ? -EFAULT : err;
+ } else {
iocb->ki_pos += copied;
m->read_pos += copied;
}
@@ -291,9 +291,6 @@ Done:
Enomem:
err = -ENOMEM;
goto Done;
-Efault:
- err = -EFAULT;
- goto Done;
}
EXPORT_SYMBOL(seq_read_iter);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 155521e51ac5..1f33a5598b93 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -270,6 +270,15 @@ done:
return d_splice_alias(inode, dentry);
}
+static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry,
+ struct fscrypt_name *nm)
+{
+ if (fscrypt_is_nokey_name(dentry))
+ return -ENOKEY;
+
+ return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm);
+}
+
static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool excl)
{
@@ -293,7 +302,7 @@ static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
if (err)
return err;
- err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
+ err = ubifs_prepare_create(dir, dentry, &nm);
if (err)
goto out_budg;
@@ -505,7 +514,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx)
return 0;
if (encrypted) {
- err = fscrypt_get_encryption_info(dir);
+ err = fscrypt_prepare_readdir(dir);
if (err)
return err;
@@ -953,7 +962,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
if (err)
return err;
- err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
+ err = ubifs_prepare_create(dir, dentry, &nm);
if (err)
goto out_budg;
@@ -1038,7 +1047,7 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
return err;
}
- err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
+ err = ubifs_prepare_create(dir, dentry, &nm);
if (err) {
kfree(dev);
goto out_budg;
@@ -1122,7 +1131,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
if (err)
return err;
- err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
+ err = ubifs_prepare_create(dir, dentry, &nm);
if (err)
goto out_budg;
@@ -1610,14 +1619,6 @@ int ubifs_getattr(const struct path *path, struct kstat *stat,
return 0;
}
-static int ubifs_dir_open(struct inode *dir, struct file *file)
-{
- if (IS_ENCRYPTED(dir))
- return fscrypt_get_encryption_info(dir) ? -EACCES : 0;
-
- return 0;
-}
-
const struct inode_operations ubifs_dir_inode_operations = {
.lookup = ubifs_lookup,
.create = ubifs_create,
@@ -1644,7 +1645,6 @@ const struct file_operations ubifs_dir_operations = {
.iterate_shared = ubifs_readdir,
.fsync = ubifs_fsync,
.unlocked_ioctl = ubifs_ioctl,
- .open = ubifs_dir_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = ubifs_compat_ioctl,
#endif
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index bb128db220ac..d6ef69ab1c67 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -515,7 +515,7 @@ xfs_attr_copy_value(
*========================================================================*/
/*
- * Query whether the requested number of additional bytes of extended
+ * Query whether the total requested number of attr fork bytes of extended
* attribute space will be able to fit inline.
*
* Returns zero if not, else the di_forkoff fork offset to be used in the
@@ -535,6 +535,12 @@ xfs_attr_shortform_bytesfit(
int maxforkoff;
int offset;
+ /*
+ * Check if the new size could fit at all first:
+ */
+ if (bytes > XFS_LITINO(mp))
+ return 0;
+
/* rounded down */
offset = (XFS_LITINO(mp) - bytes) >> 3;
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 577a66381327..beb81c84a937 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -243,8 +243,8 @@ xfs_rmapbt_key_diff(
else if (y > x)
return -1;
- x = be64_to_cpu(kp->rm_offset);
- y = xfs_rmap_irec_offset_pack(rec);
+ x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset));
+ y = rec->rm_offset;
if (x > y)
return 1;
else if (y > x)
@@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys(
else if (y > x)
return -1;
- x = be64_to_cpu(kp1->rm_offset);
- y = be64_to_cpu(kp2->rm_offset);
+ x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset));
+ y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset));
if (x > y)
return 1;
else if (y > x)
@@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder(
return 1;
else if (a > b)
return 0;
- a = be64_to_cpu(k1->rmap.rm_offset);
- b = be64_to_cpu(k2->rmap.rm_offset);
+ a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset));
+ b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset));
if (a <= b)
return 1;
return 0;
@@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder(
return 1;
else if (a > b)
return 0;
- a = be64_to_cpu(r1->rmap.rm_offset);
- b = be64_to_cpu(r2->rmap.rm_offset);
+ a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset));
+ b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset));
if (a <= b)
return 1;
return 0;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 412e2ec55e38..fed56d213a3f 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -218,13 +218,13 @@ xchk_bmap_xref_rmap(
* which doesn't track unwritten state.
*/
if (owner != XFS_RMAP_OWN_COW &&
- irec->br_state == XFS_EXT_UNWRITTEN &&
- !(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
+ !!(irec->br_state == XFS_EXT_UNWRITTEN) !=
+ !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
- if (info->whichfork == XFS_ATTR_FORK &&
- !(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
+ if (!!(info->whichfork == XFS_ATTR_FORK) !=
+ !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index f52a7b8256f9..debf392e0515 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -452,32 +452,41 @@ xchk_btree_check_minrecs(
int level,
struct xfs_btree_block *block)
{
- unsigned int numrecs;
- int ok_level;
-
- numrecs = be16_to_cpu(block->bb_numrecs);
+ struct xfs_btree_cur *cur = bs->cur;
+ unsigned int root_level = cur->bc_nlevels - 1;
+ unsigned int numrecs = be16_to_cpu(block->bb_numrecs);
/* More records than minrecs means the block is ok. */
- if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level))
+ if (numrecs >= cur->bc_ops->get_minrecs(cur, level))
return;
/*
- * Certain btree blocks /can/ have fewer than minrecs records. Any
- * level greater than or equal to the level of the highest dedicated
- * btree block are allowed to violate this constraint.
- *
- * For a btree rooted in a block, the btree root can have fewer than
- * minrecs records. If the btree is rooted in an inode and does not
- * store records in the root, the direct children of the root and the
- * root itself can have fewer than minrecs records.
+ * For btrees rooted in the inode, it's possible that the root block
+ * contents spilled into a regular ondisk block because there wasn't
+ * enough space in the inode root. The number of records in that
+ * child block might be less than the standard minrecs, but that's ok
+ * provided that there's only one direct child of the root.
*/
- ok_level = bs->cur->bc_nlevels - 1;
- if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
- ok_level--;
- if (level >= ok_level)
+ if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
+ level == cur->bc_nlevels - 2) {
+ struct xfs_btree_block *root_block;
+ struct xfs_buf *root_bp;
+ int root_maxrecs;
+
+ root_block = xfs_btree_get_block(cur, root_level, &root_bp);
+ root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level);
+ if (be16_to_cpu(root_block->bb_numrecs) != 1 ||
+ numrecs <= root_maxrecs)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
return;
+ }
- xchk_btree_set_corrupt(bs->sc, bs->cur, level);
+ /*
+ * Otherwise, only the root level is allowed to have fewer than minrecs
+ * records or keyptrs.
+ */
+ if (level < root_level)
+ xchk_btree_set_corrupt(bs->sc, cur, level);
}
/*
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 7c432997edad..b045e95c2ea7 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -558,14 +558,27 @@ xchk_directory_leaf1_bestfree(
/* Check all the bestfree entries. */
for (i = 0; i < bestcount; i++, bestp++) {
best = be16_to_cpu(*bestp);
- if (best == NULLDATAOFF)
- continue;
error = xfs_dir3_data_read(sc->tp, sc->ip,
- i * args->geo->fsbcount, 0, &dbp);
+ xfs_dir2_db_to_da(args->geo, i),
+ XFS_DABUF_MAP_HOLE_OK,
+ &dbp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
&error))
break;
- xchk_directory_check_freesp(sc, lblk, dbp, best);
+
+ if (!dbp) {
+ if (best != NULLDATAOFF) {
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
+ lblk);
+ break;
+ }
+ continue;
+ }
+
+ if (best == NULLDATAOFF)
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
+ else
+ xchk_directory_check_freesp(sc, lblk, dbp, best);
xfs_trans_brelse(sc->tp, dbp);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3abb8b9d6f4c..7b9ff824e82d 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -706,6 +706,23 @@ relock:
return 0;
}
+/*
+ * Check that the imap we are going to return to the caller spans the entire
+ * range that the caller requested for the IO.
+ */
+static bool
+imap_spans_range(
+ struct xfs_bmbt_irec *imap,
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t end_fsb)
+{
+ if (imap->br_startoff > offset_fsb)
+ return false;
+ if (imap->br_startoff + imap->br_blockcount < end_fsb)
+ return false;
+ return true;
+}
+
static int
xfs_direct_write_iomap_begin(
struct inode *inode,
@@ -766,6 +783,18 @@ xfs_direct_write_iomap_begin(
if (imap_needs_alloc(inode, flags, &imap, nimaps))
goto allocate_blocks;
+ /*
+ * NOWAIT IO needs to span the entire requested IO with a single map so
+ * that we avoid partial IO failures due to the rest of the IO range not
+ * covered by this map triggering an EAGAIN condition when it is
+ * subsequently mapped and aborting the IO.
+ */
+ if ((flags & IOMAP_NOWAIT) &&
+ !imap_spans_range(&imap, offset_fsb, end_fsb)) {
+ error = -EAGAIN;
+ goto out_unlock;
+ }
+
xfs_iunlock(ip, lockmode);
trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 233dcc8784db..2a45138831e3 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
/* Where do we start the traversal? */
xfs_ino_t startino;
+ /* What was the last inode number we saw when iterating the inobt? */
+ xfs_ino_t lastino;
+
/* Array of inobt records we cache. */
struct xfs_inobt_rec_incore *recs;
@@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
if (XFS_IS_CORRUPT(mp, *has_more != 1))
return -EFSCORRUPTED;
+ iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
+ irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
+
/*
* If the LE lookup yielded an inobt record before the cursor position,
* skip it and see if there's another one after it.
@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
struct xfs_mount *mp = iwag->mp;
struct xfs_trans *tp = iwag->tp;
struct xfs_inobt_rec_incore *irec;
- xfs_agino_t restart;
+ xfs_agino_t next_agino;
int error;
+ next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
+
ASSERT(iwag->nr_recs > 0);
/* Delete cursor but remember the last record we cached... */
xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
irec = &iwag->recs[iwag->nr_recs - 1];
- restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
+ ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
error = xfs_iwalk_ag_recs(iwag);
if (error)
@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
if (error)
return error;
- return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
+ return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
}
/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
@@ -396,6 +404,7 @@ xfs_iwalk_ag(
while (!error && has_more) {
struct xfs_inobt_rec_incore *irec;
+ xfs_ino_t rec_fsino;
cond_resched();
if (xfs_pwork_want_abort(&iwag->pwork))
@@ -407,6 +416,15 @@ xfs_iwalk_ag(
if (error || !has_more)
break;
+ /* Make sure that we always move forward. */
+ rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
+ if (iwag->lastino != NULLFSINO &&
+ XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+ iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
+
/* No allocated inodes in this chunk; skip it. */
if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
error = xfs_btree_increment(cur, 0, &has_more);
@@ -535,6 +553,7 @@ xfs_iwalk(
.trim_start = 1,
.skip_empty = 1,
.pwork = XFS_PWORK_SINGLE_THREADED,
+ .lastino = NULLFSINO,
};
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error;
@@ -623,6 +642,7 @@ xfs_iwalk_threaded(
iwag->data = data;
iwag->startino = startino;
iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
+ iwag->lastino = NULLFSINO;
xfs_pwork_queue(&pctl, &iwag->pwork);
startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
if (flags & XFS_INOBT_WALK_SAME_AG)
@@ -696,6 +716,7 @@ xfs_inobt_walk(
.startino = startino,
.sz_recs = xfs_inobt_walk_prefetch(inobt_records),
.pwork = XFS_PWORK_SINGLE_THREADED,
+ .lastino = NULLFSINO,
};
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 150ee5cb8645..7110507a2b6b 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -194,20 +194,25 @@ xfs_initialize_perag(
}
pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
- if (!pag)
+ if (!pag) {
+ error = -ENOMEM;
goto out_unwind_new_pags;
+ }
pag->pag_agno = index;
pag->pag_mount = mp;
spin_lock_init(&pag->pag_ici_lock);
INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- if (xfs_buf_hash_init(pag))
+
+ error = xfs_buf_hash_init(pag);
+ if (error)
goto out_free_pag;
init_waitqueue_head(&pag->pagb_wait);
spin_lock_init(&pag->pagb_lock);
pag->pagb_count = 0;
pag->pagb_tree = RB_ROOT;
- if (radix_tree_preload(GFP_NOFS))
+ error = radix_tree_preload(GFP_NOFS);
+ if (error)
goto out_hash_destroy;
spin_lock(&mp->m_perag_lock);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index ff5930be096c..bec47f2d074b 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -691,21 +691,23 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
bio->bi_opf |= REQ_FUA;
ret = bio_iov_iter_get_pages(bio, from);
- if (unlikely(ret)) {
- bio_io_error(bio);
- return ret;
- }
+ if (unlikely(ret))
+ goto out_release;
+
size = bio->bi_iter.bi_size;
- task_io_account_write(ret);
+ task_io_account_write(size);
if (iocb->ki_flags & IOCB_HIPRI)
bio_set_polled(bio, iocb);
ret = submit_bio_wait(bio);
+ zonefs_file_write_dio_end_io(iocb, size, ret, 0);
+
+out_release:
+ bio_release_pages(bio, false);
bio_put(bio);
- zonefs_file_write_dio_end_io(iocb, size, ret, 0);
if (ret >= 0) {
iocb->ki_pos += size;
return size;