summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/ext4/Makefile1
-rw-r--r--fs/ext4/acl.c26
-rw-r--r--fs/ext4/acl.h1
-rw-r--r--fs/ext4/balloc.c16
-rw-r--r--fs/ext4/bitmap.c1
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h84
-rw-r--r--fs/ext4/ext4_jbd2.c3
-rw-r--r--fs/ext4/ext4_jbd2.h3
-rw-r--r--fs/ext4/extents.c17
-rw-r--r--fs/ext4/extents_status.c1
-rw-r--r--fs/ext4/extents_status.h1
-rw-r--r--fs/ext4/file.c373
-rw-r--r--fs/ext4/fsync.c3
-rw-r--r--fs/ext4/hash.c4
-rw-r--r--fs/ext4/ialloc.c100
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inline.c43
-rw-r--r--fs/ext4/inode.c438
-rw-r--r--fs/ext4/ioctl.c144
-rw-r--r--fs/ext4/mballoc.c40
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/mmp.c5
-rw-r--r--fs/ext4/namei.c127
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/readpage.c3
-rw-r--r--fs/ext4/resize.c108
-rw-r--r--fs/ext4/super.c169
-rw-r--r--fs/ext4/symlink.c1
-rw-r--r--fs/ext4/sysfs.c1
-rw-r--r--fs/ext4/truncate.h1
-rw-r--r--fs/ext4/xattr.c231
-rw-r--r--fs/ext4/xattr.h1
-rw-r--r--fs/ext4/xattr_security.c1
-rw-r--r--fs/ext4/xattr_trusted.c1
-rw-r--r--fs/ext4/xattr_user.c1
38 files changed, 950 insertions, 1011 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e38039fd96ff..73b850f5659c 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -37,6 +37,7 @@ config EXT4_FS
select CRC16
select CRYPTO
select CRYPTO_CRC32C
+ select FS_IOMAP
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index d9beca1653c5..8fdfcd3c3e04 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux ext4-filesystem routines.
#
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 09441ae07a5b..fb50f9aa6ead 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/acl.c
*
@@ -193,13 +194,6 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
switch (type) {
case ACL_TYPE_ACCESS:
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
- if (error)
- return error;
- inode->i_ctime = current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- }
break;
case ACL_TYPE_DEFAULT:
@@ -221,8 +215,9 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
value, size, xattr_flags);
kfree(value);
- if (!error)
+ if (!error) {
set_cached_acl(inode, type, acl);
+ }
return error;
}
@@ -233,6 +228,8 @@ ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
handle_t *handle;
int error, credits, retries = 0;
size_t acl_size = acl ? ext4_acl_size(acl->a_count) : 0;
+ umode_t mode = inode->i_mode;
+ int update_mode = 0;
error = dquot_initialize(inode);
if (error)
@@ -247,7 +244,20 @@ retry:
if (IS_ERR(handle))
return PTR_ERR(handle);
+ if ((type == ACL_TYPE_ACCESS) && acl) {
+ error = posix_acl_update_mode(inode, &mode, &acl);
+ if (error)
+ goto out_stop;
+ update_mode = 1;
+ }
+
error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
+ if (!error && update_mode) {
+ inode->i_mode = mode;
+ inode->i_ctime = current_time(inode);
+ ext4_mark_inode_dirty(handle, inode);
+ }
+out_stop:
ext4_journal_stop(handle);
if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index da2c79577d72..a48fc5ae2701 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
File: fs/ext4/acl.h
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e04ec868e37e..a943e568292e 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/balloc.c
*
@@ -600,22 +601,21 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
* ext4_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
* for the current or committing transaction to complete, and then
- * return TRUE.
- *
- * if the total number of retries exceed three times, return FALSE.
+ * return TRUE. We will only retry once.
*/
int ext4_should_retry_alloc(struct super_block *sb, int *retries)
{
if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
- (*retries)++ > 3 ||
+ (*retries)++ > 1 ||
!EXT4_SB(sb)->s_journal)
return 0;
- jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
-
smp_mb();
- if (EXT4_SB(sb)->s_mb_free_pending)
- jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
+ if (EXT4_SB(sb)->s_mb_free_pending == 0)
+ return 0;
+
+ jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+ jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
return 1;
}
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 4a606afb171f..f63e028c638c 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/bitmap.c
*
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index fdb19543af1e..bee888e0e2db 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/block_validity.c
*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index e8b365000d73..d5babc9f222b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/dir.c
*
@@ -411,7 +412,7 @@ static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp,
{
struct dir_private_info *p;
- p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return NULL;
p->curr_hash = pos2maj_hash(filp, pos);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..4e091eae38b1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* ext4.h
*
@@ -33,17 +34,15 @@
#include <linux/percpu_counter.h>
#include <linux/ratelimit.h>
#include <crypto/hash.h>
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
-#include <linux/fscrypt_supp.h>
-#else
-#include <linux/fscrypt_notsupp.h>
-#endif
#include <linux/falloc.h>
#include <linux/percpu-rwsem.h>
#ifdef __KERNEL__
#include <linux/compat.h>
#endif
+#define __FS_HAS_ENCRYPTION IS_ENABLED(CONFIG_EXT4_FS_ENCRYPTION)
+#include <linux/fscrypt.h>
+
/*
* The fourth extended filesystem constants/structures
*/
@@ -545,8 +544,8 @@ struct ext4_new_group_data {
__u64 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
- __u16 unused;
- __u32 free_blocks_count;
+ __u16 mdata_blocks;
+ __u32 free_clusters_count;
};
/* Indexes used to index group tables in ext4_new_group_data */
@@ -644,43 +643,6 @@ enum {
#define EXT4_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
#define EXT4_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
-#ifndef FS_IOC_FSGETXATTR
-/* Until the uapi changes get merged for project quota... */
-
-#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
-#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
-
-/*
- * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
- */
-struct fsxattr {
- __u32 fsx_xflags; /* xflags field value (get/set) */
- __u32 fsx_extsize; /* extsize field value (get/set)*/
- __u32 fsx_nextents; /* nextents field value (get) */
- __u32 fsx_projid; /* project identifier (get/set) */
- unsigned char fsx_pad[12];
-};
-
-/*
- * Flags for the fsx_xflags field
- */
-#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
-#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
-#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
-#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
-#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
-#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
-#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
-#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
-#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
-#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
-#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
-#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
-#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
-#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
-#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
-#endif /* !defined(FS_IOC_FSGETXATTR) */
-
#define EXT4_IOC_FSGETXATTR FS_IOC_FSGETXATTR
#define EXT4_IOC_FSSETXATTR FS_IOC_FSSETXATTR
@@ -838,13 +800,11 @@ static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
{
if (unlikely(sizeof(time->tv_sec) > 4 &&
(extra & cpu_to_le32(EXT4_EPOCH_MASK)))) {
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0)
+
+#if 1
/* Handle legacy encoding of pre-1970 dates with epoch
- * bits 1,1. We assume that by kernel version 4.20,
- * everyone will have run fsck over the affected
- * filesystems to correct the problem. (This
- * backwards compatibility may be removed before this
- * time, at the discretion of the ext4 developers.)
+ * bits 1,1. (This backwards compatibility may be removed
+ * at the discretion of the ext4 developers.)
*/
u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK;
if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0)
@@ -961,7 +921,7 @@ struct ext4_inode_info {
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
- * it is ued for making block allocation decisions - we try to
+ * it is used for making block allocation decisions - we try to
* place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode.
*/
@@ -1049,10 +1009,8 @@ struct ext4_inode_info {
ext4_group_t i_last_alloc_group;
/* allocation reservation info for delalloc */
- /* In case of bigalloc, these refer to clusters rather than blocks */
+ /* In case of bigalloc, this refer to clusters rather than blocks */
unsigned int i_reserved_data_blocks;
- unsigned int i_reserved_meta_blocks;
- unsigned int i_allocated_meta_blocks;
ext4_lblk_t i_da_metadata_calc_last_lblock;
int i_da_metadata_calc_len;
@@ -1396,8 +1354,6 @@ struct ext4_sb_info {
int s_first_ino;
unsigned int s_inode_readahead_blks;
unsigned int s_inode_goal;
- spinlock_t s_next_gen_lock;
- u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */
@@ -1530,6 +1486,7 @@ struct ext4_sb_info {
/* Barrier between changing inodes' journal flags and writepages ops. */
struct percpu_rw_semaphore s_journal_flag_rwsem;
+ struct dax_device *s_daxdev;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1569,6 +1526,7 @@ enum {
nolocking */
EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
EXT4_STATE_EXT_PRECACHED, /* extents have been precached */
+ EXT4_STATE_LUSTRE_EA_INODE, /* Lustre-style ea_inode */
};
#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -2022,7 +1980,8 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
#define is_dx(dir) (ext4_has_feature_dir_index((dir)->i_sb) && \
ext4_test_inode_flag((dir), EXT4_INODE_INDEX))
-#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
+#define EXT4_DIR_LINK_MAX(dir) unlikely((dir)->i_nlink >= EXT4_LINK_MAX && \
+ !(ext4_has_feature_dir_nlink((dir)->i_sb) && is_dx(dir)))
#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
/* Legal values for the dx_root hash_version field: */
@@ -2462,6 +2421,8 @@ extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+ bool wait, struct buffer_head **bhs);
int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2514,9 +2475,6 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
-extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
- unsigned int map_len,
- struct extent_status *result);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -3047,6 +3005,10 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
extern int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline, __u64 start, __u64 len);
+
+struct iomap;
+extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
+
extern int ext4_try_to_evict_inline_data(handle_t *handle,
struct inode *inode,
int needed);
@@ -3074,7 +3036,7 @@ extern int ext4_handle_dirty_dirent_node(handle_t *handle,
struct inode *inode,
struct buffer_head *bh);
#define S_SHIFT 12
-static const unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
+static const unsigned char ext4_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = EXT4_FT_DIR,
[S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV,
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index dd106b1d5d89..2d593201cf7a 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Interface between ext4 and JBD
*/
@@ -47,7 +48,7 @@ static int ext4_journal_check_start(struct super_block *sb)
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return -EIO;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return -EROFS;
WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index dabad1bc8617..48143e32411c 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -227,6 +227,9 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
+int ext4_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc *iloc);
/*
* Wrapper functions with which ext4 calls into JBD.
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e0a8425ff74d..07bca11749d4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4652,7 +4652,7 @@ retry:
static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
ext4_lblk_t len, loff_t new_size,
- int flags, int mode)
+ int flags)
{
struct inode *inode = file_inode(file);
handle_t *handle;
@@ -4794,7 +4794,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ (offset + len > i_size_read(inode) ||
+ offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
@@ -4815,7 +4816,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits) >> blkbits,
(round_up((offset + len), 1 << blkbits) -
round_down(offset, 1 << blkbits)) >> blkbits,
- new_size, flags, mode);
+ new_size, flags);
if (ret)
goto out_dio;
@@ -4841,7 +4842,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
inode->i_mtime = inode->i_ctime = current_time(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
- flags, mode);
+ flags);
up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret)
goto out_dio;
@@ -4965,7 +4966,8 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
+ (offset + len > i_size_read(inode) ||
+ offset + len > EXT4_I(inode)->i_disksize)) {
new_size = offset + len;
ret = inode_newsize_ok(inode, new_size);
if (ret)
@@ -4976,8 +4978,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
- ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
- flags, mode);
+ ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
ext4_inode_resume_unlocked_dio(inode);
if (ret)
goto out;
@@ -5837,7 +5838,7 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
if (e1_blk > lblk1)
next1 = e1_blk;
if (e2_blk > lblk2)
- next2 = e1_blk;
+ next2 = e2_blk;
/* Do we have something to swap */
if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
goto finish;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index e7f12a204cbc..763ef185dd17 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* fs/ext4/extents_status.c
*
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f7aa24f4642d..ca90fc96f47e 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/ext4/extents_status.h
*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 58294c9a7e1d..a0ae27b1bc66 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/file.c
*
@@ -20,12 +21,14 @@
#include <linux/time.h>
#include <linux/fs.h>
+#include <linux/iomap.h>
#include <linux/mount.h>
#include <linux/path.h>
#include <linux/dax.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
#include <linux/uio.h>
+#include <linux/mman.h>
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -223,6 +226,8 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from);
#endif
+ if (!o_direct && (iocb->ki_flags & IOCB_NOWAIT))
+ return -EOPNOTSUPP;
if (!inode_trylock(inode)) {
if (iocb->ki_flags & IOCB_NOWAIT)
@@ -279,7 +284,21 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
handle_t *handle = NULL;
struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
- bool write = vmf->flags & FAULT_FLAG_WRITE;
+
+ /*
+ * We have to distinguish real writes from writes which will result in a
+ * COW page; COW writes should *not* poke the journal (the file will not
+ * be changed). Doing so would cause unintended failures when mounted
+ * read-only.
+ *
+ * We check for VM_SHARED rather than vmf->cow_page since the latter is
+ * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
+ * other sizes, dax_iomap_fault will handle splitting / fallback so that
+ * we eventually come back with a COW page.
+ */
+ bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
+ (vmf->vma->vm_flags & VM_SHARED);
+ pfn_t pfn;
if (write) {
sb_start_pagefault(sb);
@@ -287,16 +306,20 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
+ if (IS_ERR(handle)) {
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(sb);
+ return VM_FAULT_SIGBUS;
+ }
} else {
down_read(&EXT4_I(inode)->i_mmap_sem);
}
- if (!IS_ERR(handle))
- result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
- else
- result = VM_FAULT_SIGBUS;
+ result = dax_iomap_fault(vmf, pe_size, &pfn, &ext4_iomap_ops);
if (write) {
- if (!IS_ERR(handle))
- ext4_journal_stop(handle);
+ ext4_journal_stop(handle);
+ /* Handling synchronous page fault? */
+ if (result & VM_FAULT_NEEDDSYNC)
+ result = dax_finish_sync_fault(vmf, pe_size, pfn);
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
} else {
@@ -311,41 +334,11 @@ static int ext4_dax_fault(struct vm_fault *vmf)
return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
}
-/*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
- * handler we check for races agaist truncate. Note that since we cycle through
- * i_mmap_sem, we are sure that also any hole punching that began before we
- * were called is finished by now and so if it included part of the file we
- * are working on, our pte will get unmapped and the check for pte_same() in
- * wp_pfn_shared() fails. Thus fault gets retried and things work out as
- * desired.
- */
-static int ext4_dax_pfn_mkwrite(struct vm_fault *vmf)
-{
- struct inode *inode = file_inode(vmf->vma->vm_file);
- struct super_block *sb = inode->i_sb;
- loff_t size;
- int ret;
-
- sb_start_pagefault(sb);
- file_update_time(vmf->vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
- size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
- if (vmf->pgoff >= size)
- ret = VM_FAULT_SIGBUS;
- else
- ret = dax_pfn_mkwrite(vmf);
- up_read(&EXT4_I(inode)->i_mmap_sem);
- sb_end_pagefault(sb);
-
- return ret;
-}
-
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.huge_fault = ext4_dax_huge_fault,
.page_mkwrite = ext4_dax_fault,
- .pfn_mkwrite = ext4_dax_pfn_mkwrite,
+ .pfn_mkwrite = ext4_dax_fault,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
@@ -364,6 +357,13 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO;
+ /*
+ * We don't support synchronous mappings for non-DAX files. At least
+ * until someone comes with a sensible use case.
+ */
+ if (!IS_DAX(file_inode(file)) && (vma->vm_flags & VM_SYNC))
+ return -EOPNOTSUPP;
+
file_accessed(file);
if (IS_DAX(file_inode(file))) {
vma->vm_ops = &ext4_dax_vm_ops;
@@ -379,7 +379,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct vfsmount *mnt = filp->f_path.mnt;
- struct dentry *dir;
struct path path;
char buf[64], *cp;
int ret;
@@ -388,7 +387,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return -EIO;
if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
- !(sb->s_flags & MS_RDONLY))) {
+ !sb_rdonly(sb))) {
sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
/*
* Sample where the filesystem has been mounted and
@@ -419,25 +418,11 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
ext4_journal_stop(handle);
}
}
- if (ext4_encrypted_inode(inode)) {
- ret = fscrypt_get_encryption_info(inode);
- if (ret)
- return -EACCES;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
- dir = dget_parent(file_dentry(filp));
- if (ext4_encrypted_inode(d_inode(dir)) &&
- !fscrypt_has_permitted_context(d_inode(dir), inode)) {
- ext4_warning(inode->i_sb,
- "Inconsistent encryption contexts: %lu/%lu",
- (unsigned long) d_inode(dir)->i_ino,
- (unsigned long) inode->i_ino);
- dput(dir);
- return -EPERM;
- }
- dput(dir);
+ ret = fscrypt_file_open(inode, filp);
+ if (ret)
+ return ret;
+
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
@@ -448,260 +433,11 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
return ret;
}
- /* Set the flags to support nowait AIO */
- filp->f_mode |= FMODE_AIO_NOWAIT;
-
+ filp->f_mode |= FMODE_NOWAIT;
return dquot_file_open(inode, filp);
}
/*
- * Here we use ext4_map_blocks() to get a block mapping for a extent-based
- * file rather than ext4_ext_walk_space() because we can introduce
- * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
- * function. When extent status tree has been fully implemented, it will
- * track all extent status for a file and we can directly use it to
- * retrieve the offset for SEEK_DATA/SEEK_HOLE.
- */
-
-/*
- * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
- * lookup page cache to check whether or not there has some data between
- * [startoff, endoff] because, if this range contains an unwritten extent,
- * we determine this extent as a data or a hole according to whether the
- * page cache has data or not.
- */
-static int ext4_find_unwritten_pgoff(struct inode *inode,
- int whence,
- ext4_lblk_t end_blk,
- loff_t *offset)
-{
- struct pagevec pvec;
- unsigned int blkbits;
- pgoff_t index;
- pgoff_t end;
- loff_t endoff;
- loff_t startoff;
- loff_t lastoff;
- int found = 0;
-
- blkbits = inode->i_sb->s_blocksize_bits;
- startoff = *offset;
- lastoff = startoff;
- endoff = (loff_t)end_blk << blkbits;
-
- index = startoff >> PAGE_SHIFT;
- end = (endoff - 1) >> PAGE_SHIFT;
-
- pagevec_init(&pvec, 0);
- do {
- int i, num;
- unsigned long nr_pages;
-
- num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
- (pgoff_t)num);
- if (nr_pages == 0)
- break;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
- struct buffer_head *bh, *head;
-
- /*
- * If current offset is smaller than the page offset,
- * there is a hole at this offset.
- */
- if (whence == SEEK_HOLE && lastoff < endoff &&
- lastoff < page_offset(pvec.pages[i])) {
- found = 1;
- *offset = lastoff;
- goto out;
- }
-
- if (page->index > end)
- goto out;
-
- lock_page(page);
-
- if (unlikely(page->mapping != inode->i_mapping)) {
- unlock_page(page);
- continue;
- }
-
- if (!page_has_buffers(page)) {
- unlock_page(page);
- continue;
- }
-
- if (page_has_buffers(page)) {
- lastoff = page_offset(page);
- bh = head = page_buffers(page);
- do {
- if (buffer_uptodate(bh) ||
- buffer_unwritten(bh)) {
- if (whence == SEEK_DATA)
- found = 1;
- } else {
- if (whence == SEEK_HOLE)
- found = 1;
- }
- if (found) {
- *offset = max_t(loff_t,
- startoff, lastoff);
- unlock_page(page);
- goto out;
- }
- lastoff += bh->b_size;
- bh = bh->b_this_page;
- } while (bh != head);
- }
-
- lastoff = page_offset(page) + PAGE_SIZE;
- unlock_page(page);
- }
-
- /* The no. of pages is less than our desired, we are done. */
- if (nr_pages < num)
- break;
-
- index = pvec.pages[i - 1]->index + 1;
- pagevec_release(&pvec);
- } while (index <= end);
-
- if (whence == SEEK_HOLE && lastoff < endoff) {
- found = 1;
- *offset = lastoff;
- }
-out:
- pagevec_release(&pvec);
- return found;
-}
-
-/*
- * ext4_seek_data() retrieves the offset for SEEK_DATA.
- */
-static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
-{
- struct inode *inode = file->f_mapping->host;
- struct extent_status es;
- ext4_lblk_t start, last, end;
- loff_t dataoff, isize;
- int blkbits;
- int ret;
-
- inode_lock(inode);
-
- isize = i_size_read(inode);
- if (offset >= isize) {
- inode_unlock(inode);
- return -ENXIO;
- }
-
- blkbits = inode->i_sb->s_blocksize_bits;
- start = offset >> blkbits;
- last = start;
- end = isize >> blkbits;
- dataoff = offset;
-
- do {
- ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
- if (ret <= 0) {
- /* No extent found -> no data */
- if (ret == 0)
- ret = -ENXIO;
- inode_unlock(inode);
- return ret;
- }
-
- last = es.es_lblk;
- if (last != start)
- dataoff = (loff_t)last << blkbits;
- if (!ext4_es_is_unwritten(&es))
- break;
-
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- es.es_lblk + es.es_len, &dataoff))
- break;
- last += es.es_len;
- dataoff = (loff_t)last << blkbits;
- cond_resched();
- } while (last <= end);
-
- inode_unlock(inode);
-
- if (dataoff > isize)
- return -ENXIO;
-
- return vfs_setpos(file, dataoff, maxsize);
-}
-
-/*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
- */
-static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
-{
- struct inode *inode = file->f_mapping->host;
- struct extent_status es;
- ext4_lblk_t start, last, end;
- loff_t holeoff, isize;
- int blkbits;
- int ret;
-
- inode_lock(inode);
-
- isize = i_size_read(inode);
- if (offset >= isize) {
- inode_unlock(inode);
- return -ENXIO;
- }
-
- blkbits = inode->i_sb->s_blocksize_bits;
- start = offset >> blkbits;
- last = start;
- end = isize >> blkbits;
- holeoff = offset;
-
- do {
- ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
- if (ret < 0) {
- inode_unlock(inode);
- return ret;
- }
- /* Found a hole? */
- if (ret == 0 || es.es_lblk > last) {
- if (last != start)
- holeoff = (loff_t)last << blkbits;
- break;
- }
- /*
- * If there is a unwritten extent at this offset,
- * it will be as a data or a hole according to page
- * cache that has data or not.
- */
- if (ext4_es_is_unwritten(&es) &&
- ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- last + es.es_len, &holeoff))
- break;
-
- last += es.es_len;
- holeoff = (loff_t)last << blkbits;
- cond_resched();
- } while (last <= end);
-
- inode_unlock(inode);
-
- if (holeoff > isize)
- holeoff = isize;
-
- return vfs_setpos(file, holeoff, maxsize);
-}
-
-/*
* ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
* by calling generic_file_llseek_size() with the appropriate maxbytes
* value for each.
@@ -717,18 +453,24 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
maxbytes = inode->i_sb->s_maxbytes;
switch (whence) {
- case SEEK_SET:
- case SEEK_CUR:
- case SEEK_END:
+ default:
return generic_file_llseek_size(file, offset, whence,
maxbytes, i_size_read(inode));
- case SEEK_DATA:
- return ext4_seek_data(file, offset, maxbytes);
case SEEK_HOLE:
- return ext4_seek_hole(file, offset, maxbytes);
+ inode_lock_shared(inode);
+ offset = iomap_seek_hole(inode, offset, &ext4_iomap_ops);
+ inode_unlock_shared(inode);
+ break;
+ case SEEK_DATA:
+ inode_lock_shared(inode);
+ offset = iomap_seek_data(inode, offset, &ext4_iomap_ops);
+ inode_unlock_shared(inode);
+ break;
}
- return -EINVAL;
+ if (offset < 0)
+ return offset;
+ return vfs_setpos(file, offset, maxbytes);
}
const struct file_operations ext4_file_operations = {
@@ -740,6 +482,7 @@ const struct file_operations ext4_file_operations = {
.compat_ioctl = ext4_compat_ioctl,
#endif
.mmap = ext4_file_mmap,
+ .mmap_supported_flags = MAP_SYNC,
.open = ext4_file_open,
.release = ext4_release_file,
.fsync = ext4_sync_file,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index aae2c3971cef..26a7fe5c4fd3 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/fsync.c
*
@@ -107,7 +108,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trace_ext4_sync_file_enter(file, datasync);
- if (inode->i_sb->s_flags & MS_RDONLY) {
+ if (sb_rdonly(inode->i_sb)) {
/* Make sure that we read updated s_mount_flags value */
smp_rmb();
if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED)
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 38b8a96eb97c..00c6dd29e621 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -148,8 +148,6 @@ static void str2hashbuf_signed(const char *msg, int len, __u32 *buf, int num)
if (len > num*4)
len = num * 4;
for (i = 0; i < len; i++) {
- if ((i % 4) == 0)
- val = pad;
val = ((int) scp[i]) + (val << 8);
if ((i % 4) == 3) {
*buf++ = val;
@@ -176,8 +174,6 @@ static void str2hashbuf_unsigned(const char *msg, int len, __u32 *buf, int num)
if (len > num*4)
len = num * 4;
for (i = 0; i < len; i++) {
- if ((i % 4) == 0)
- val = pad;
val = ((int) ucp[i]) + (val << 8);
if ((i % 4) == 3) {
*buf++ = val;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 507bfb3344d4..b4267d72f249 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/ialloc.c
*
@@ -692,24 +693,25 @@ static int find_group_other(struct super_block *sb, struct inode *parent,
* somewhat arbitrary...)
*/
#define RECENTCY_MIN 5
-#define RECENTCY_DIRTY 30
+#define RECENTCY_DIRTY 300
static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
{
struct ext4_group_desc *gdp;
struct ext4_inode *raw_inode;
struct buffer_head *bh;
- unsigned long dtime, now;
- int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
- int offset, ret = 0, recentcy = RECENTCY_MIN;
+ int inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
+ int offset, ret = 0;
+ int recentcy = RECENTCY_MIN;
+ u32 dtime, now;
gdp = ext4_get_group_desc(sb, group, NULL);
if (unlikely(!gdp))
return 0;
- bh = sb_getblk(sb, ext4_inode_table(sb, gdp) +
+ bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
- if (unlikely(!bh) || !buffer_uptodate(bh))
+ if (!bh || !buffer_uptodate(bh))
/*
* If the block is not in the buffer cache, then it
* must have been written out.
@@ -718,18 +720,45 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
offset = (ino % inodes_per_block) * EXT4_INODE_SIZE(sb);
raw_inode = (struct ext4_inode *) (bh->b_data + offset);
+
+ /* i_dtime is only 32 bits on disk, but we only care about relative
+ * times in the range of a few minutes (i.e. long enough to sync a
+ * recently-deleted inode to disk), so using the low 32 bits of the
+ * clock (a 68 year range) is enough, see time_before32() */
dtime = le32_to_cpu(raw_inode->i_dtime);
- now = get_seconds();
+ now = ktime_get_real_seconds();
if (buffer_dirty(bh))
recentcy += RECENTCY_DIRTY;
- if (dtime && (dtime < now) && (now < dtime + recentcy))
+ if (dtime && time_before32(dtime, now) &&
+ time_before32(now, dtime + recentcy))
ret = 1;
out:
brelse(bh);
return ret;
}
+static int find_inode_bit(struct super_block *sb, ext4_group_t group,
+ struct buffer_head *bitmap, unsigned long *ino)
+{
+next:
+ *ino = ext4_find_next_zero_bit((unsigned long *)
+ bitmap->b_data,
+ EXT4_INODES_PER_GROUP(sb), *ino);
+ if (*ino >= EXT4_INODES_PER_GROUP(sb))
+ return 0;
+
+ if ((EXT4_SB(sb)->s_journal == NULL) &&
+ recently_deleted(sb, group, *ino)) {
+ *ino = *ino + 1;
+ if (*ino < EXT4_INODES_PER_GROUP(sb))
+ goto next;
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
@@ -892,19 +921,13 @@ got_group:
/*
* Check free inodes count before loading bitmap.
*/
- if (ext4_free_inodes_count(sb, gdp) == 0) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
+ if (ext4_free_inodes_count(sb, gdp) == 0)
+ goto next_group;
grp = ext4_get_group_info(sb, group);
/* Skip groups with already-known suspicious inode tables */
- if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
+ if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp))
+ goto next_group;
brelse(inode_bitmap_bh);
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
@@ -912,27 +935,20 @@ got_group:
if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) ||
IS_ERR(inode_bitmap_bh)) {
inode_bitmap_bh = NULL;
- if (++group == ngroups)
- group = 0;
- continue;
+ goto next_group;
}
repeat_in_this_group:
- ino = ext4_find_next_zero_bit((unsigned long *)
- inode_bitmap_bh->b_data,
- EXT4_INODES_PER_GROUP(sb), ino);
- if (ino >= EXT4_INODES_PER_GROUP(sb))
+ ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
+ if (!ret2)
goto next_group;
- if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
+
+ if (group == 0 && (ino + 1) < EXT4_FIRST_INO(sb)) {
ext4_error(sb, "reserved inode found cleared - "
"inode=%lu", ino + 1);
- continue;
- }
- if ((EXT4_SB(sb)->s_journal == NULL) &&
- recently_deleted(sb, group, ino)) {
- ino++;
- goto next_inode;
+ goto next_group;
}
+
if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
@@ -952,11 +968,23 @@ repeat_in_this_group:
}
ext4_lock_group(sb, group);
ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+ if (ret2) {
+ /* Someone already took the bit. Repeat the search
+ * with lock held.
+ */
+ ret2 = find_inode_bit(sb, group, inode_bitmap_bh, &ino);
+ if (ret2) {
+ ext4_set_bit(ino, inode_bitmap_bh->b_data);
+ ret2 = 0;
+ } else {
+ ret2 = 1; /* we didn't grab the inode */
+ }
+ }
ext4_unlock_group(sb, group);
ino++; /* the inode bitmap is zero-based */
if (!ret2)
goto got; /* we grabbed the inode! */
-next_inode:
+
if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
next_group:
@@ -1111,9 +1139,7 @@ got:
inode->i_ino);
goto out;
}
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
+ inode->i_generation = prandom_u32();
/* Precompute checksum seed for inode metadata */
if (ext4_has_metadata_csum(sb)) {
@@ -1355,7 +1381,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
int num, ret = 0, used_blks = 0;
/* This should not happen, but just to be sure check this */
- if (sb->s_flags & MS_RDONLY) {
+ if (sb_rdonly(sb)) {
ret = 1;
goto out;
}
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 7ffa290cbb8e..c32802c956d5 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/indirect.c
*
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 28c5c3abddb3..1367553c43bb 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -12,6 +12,7 @@
* GNU General Public License for more details.
*/
+#include <linux/iomap.h>
#include <linux/fiemap.h>
#include "ext4_jbd2.h"
@@ -302,11 +303,6 @@ static int ext4_create_inline_data(handle_t *handle,
EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
- /*
- * Propagate changes to inode->i_flags as well - e.g. S_DAX may
- * get cleared
- */
- ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -451,11 +447,6 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
}
}
ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
- /*
- * Propagate changes to inode->i_flags as well - e.g. S_DAX may
- * get set.
- */
- ext4_set_inode_flags(inode);
get_bh(is.iloc.bh);
error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
@@ -1827,6 +1818,38 @@ int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
return ret;
}
+int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap)
+{
+ __u64 addr;
+ int error = -EAGAIN;
+ struct ext4_iloc iloc;
+
+ down_read(&EXT4_I(inode)->xattr_sem);
+ if (!ext4_has_inline_data(inode))
+ goto out;
+
+ error = ext4_get_inode_loc(inode, &iloc);
+ if (error)
+ goto out;
+
+ addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
+ addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
+ addr += offsetof(struct ext4_inode, i_block);
+
+ brelse(iloc.bh);
+
+ iomap->addr = addr;
+ iomap->offset = 0;
+ iomap->length = min_t(loff_t, ext4_get_inline_size(inode),
+ i_size_read(inode));
+ iomap->type = 0;
+ iomap->flags = IOMAP_F_DATA_INLINE;
+
+out:
+ up_read(&EXT4_I(inode)->xattr_sem);
+ return error;
+}
+
int ext4_inline_data_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo,
int *has_inline, __u64 start, __u64 len)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3c600f02673f..0992d76f7ab1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/inode.c
*
@@ -892,7 +893,7 @@ static int ext4_dio_get_block_unwritten_async(struct inode *inode,
/*
* Get block function for non-AIO DIO writes when we create unwritten extent if
* blocks are not allocated yet. The extent will be converted to written
- * after IO is complete from ext4_ext_direct_IO() function.
+ * after IO is complete by ext4_direct_IO_write().
*/
static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
sector_t iblock, struct buffer_head *bh_result, int create)
@@ -907,7 +908,7 @@ static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
/*
* Mark inode as having pending DIO writes to unwritten extents.
- * ext4_ext_direct_IO() checks this flag and converts extents to
+ * ext4_direct_IO_write() checks this flag and converts extents to
* written.
*/
if (!ret && buffer_unwritten(bh_result))
@@ -1015,6 +1016,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return ERR_PTR(-EIO);
}
+/* Read a contiguous batch of blocks. */
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+ bool wait, struct buffer_head **bhs)
+{
+ int i, err;
+
+ for (i = 0; i < bh_count; i++) {
+ bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
+ if (IS_ERR(bhs[i])) {
+ err = PTR_ERR(bhs[i]);
+ bh_count = i;
+ goto out_brelse;
+ }
+ }
+
+ for (i = 0; i < bh_count; i++)
+ /* Note that NULL bhs[i] is valid because of holes. */
+ if (bhs[i] && !buffer_uptodate(bhs[i]))
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
+ &bhs[i]);
+
+ if (!wait)
+ return 0;
+
+ for (i = 0; i < bh_count; i++)
+ if (bhs[i])
+ wait_on_buffer(bhs[i]);
+
+ for (i = 0; i < bh_count; i++) {
+ if (bhs[i] && !buffer_uptodate(bhs[i])) {
+ err = -EIO;
+ goto out_brelse;
+ }
+ }
+ return 0;
+
+out_brelse:
+ for (i = 0; i < bh_count; i++) {
+ brelse(bhs[i]);
+ bhs[i] = NULL;
+ }
+ return err;
+}
+
int ext4_walk_page_buffers(handle_t *handle,
struct buffer_head *head,
unsigned from,
@@ -1674,15 +1719,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
ext4_es_remove_extent(inode, start, last - start + 1);
}
- pagevec_init(&pvec, 0);
+ pagevec_init(&pvec);
while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
+
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (invalidate) {
@@ -1693,7 +1737,6 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
}
unlock_page(page);
}
- index = pvec.pages[nr_pages - 1]->index + 1;
pagevec_release(&pvec);
}
}
@@ -2302,19 +2345,15 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
lblk = start << bpp_bits;
pblock = mpd->map.m_pblk;
- pagevec_init(&pvec, 0);
+ pagevec_init(&pvec);
while (start <= end) {
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
- PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
+ &start, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
- /* Up to 'end' pages must be contiguous */
- BUG_ON(page->index != start);
bh = head = page_buffers(page);
do {
if (lblk < mpd->map.m_lblk)
@@ -2359,7 +2398,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
pagevec_release(&pvec);
return err;
}
- start++;
}
pagevec_release(&pvec);
}
@@ -2578,12 +2616,12 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
else
tag = PAGECACHE_TAG_DIRTY;
- pagevec_init(&pvec, 0);
+ pagevec_init(&pvec);
mpd->map.m_len = 0;
mpd->next_page = index;
while (index <= end) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
goto out;
@@ -2591,16 +2629,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct page *page = pvec.pages[i];
/*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end)
- goto out;
-
- /*
* Accumulated enough dirty pages? This doesn't apply
* to WB_SYNC_ALL mode. For integrity sync we have to
* keep going because someone may be concurrently
@@ -3356,26 +3384,75 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
-#ifdef CONFIG_FS_DAX
+static bool ext4_inode_datasync_dirty(struct inode *inode)
+{
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+ if (journal)
+ return !jbd2_transaction_committed(journal,
+ EXT4_I(inode)->i_datasync_tid);
+ /* Any metadata buffers to write? */
+ if (!list_empty(&inode->i_mapping->private_list))
+ return true;
+ return inode->i_state & I_DIRTY_DATASYNC;
+}
+
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap)
{
- struct block_device *bdev;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits;
unsigned long last_block = (offset + length - 1) >> blkbits;
struct ext4_map_blocks map;
+ bool delalloc = false;
int ret;
- if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
- return -ERANGE;
+
+ if (flags & IOMAP_REPORT) {
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_inline_data_iomap(inode, iomap);
+ if (ret != -EAGAIN) {
+ if (ret == 0 && offset >= iomap->length)
+ ret = -ENOENT;
+ return ret;
+ }
+ }
+ } else {
+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ return -ERANGE;
+ }
map.m_lblk = first_block;
map.m_len = last_block - first_block + 1;
- if (!(flags & IOMAP_WRITE)) {
+ if (flags & IOMAP_REPORT) {
ret = ext4_map_blocks(NULL, inode, &map, 0);
- } else {
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0) {
+ ext4_lblk_t end = map.m_lblk + map.m_len - 1;
+ struct extent_status es;
+
+ ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
+
+ if (!es.es_len || es.es_lblk > end) {
+ /* entire range is a hole */
+ } else if (es.es_lblk > map.m_lblk) {
+ /* range starts with a hole */
+ map.m_len = es.es_lblk - map.m_lblk;
+ } else {
+ ext4_lblk_t offs = 0;
+
+ if (es.es_lblk < map.m_lblk)
+ offs = map.m_lblk - es.es_lblk;
+ map.m_lblk = es.es_lblk + offs;
+ map.m_len = es.es_len - offs;
+ delalloc = true;
+ }
+ }
+ } else if (flags & IOMAP_WRITE) {
int dio_credits;
handle_t *handle;
int retries = 0;
@@ -3426,21 +3503,23 @@ retry:
}
}
ext4_journal_stop(handle);
+ } else {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret < 0)
+ return ret;
}
iomap->flags = 0;
- bdev = inode->i_sb->s_bdev;
- iomap->bdev = bdev;
- if (blk_queue_dax(bdev->bd_queue))
- iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
- else
- iomap->dax_dev = NULL;
+ if (ext4_inode_datasync_dirty(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->dax_dev = sbi->s_daxdev;
iomap->offset = first_block << blkbits;
+ iomap->length = (u64)map.m_len << blkbits;
if (ret == 0) {
- iomap->type = IOMAP_HOLE;
- iomap->blkno = IOMAP_NULL_BLOCK;
- iomap->length = (u64)map.m_len << blkbits;
+ iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
} else {
if (map.m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
@@ -3450,12 +3529,12 @@ retry:
WARN_ON_ONCE(1);
return -EIO;
}
- iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
- iomap->length = (u64)map.m_len << blkbits;
+ iomap->addr = (u64)map.m_pblk << blkbits;
}
if (map.m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW;
+
return 0;
}
@@ -3467,7 +3546,6 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
int blkbits = inode->i_blkbits;
bool truncate = false;
- fs_put_dax(iomap->dax_dev);
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0;
@@ -3517,8 +3595,6 @@ const struct iomap_ops ext4_iomap_ops = {
.iomap_end = ext4_iomap_end,
};
-#endif
-
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
@@ -4540,6 +4616,21 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}
+static bool ext4_should_use_dax(struct inode *inode)
+{
+ if (!test_opt(inode->i_sb, DAX))
+ return false;
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (ext4_should_journal_data(inode))
+ return false;
+ if (ext4_has_inline_data(inode))
+ return false;
+ if (ext4_encrypted_inode(inode))
+ return false;
+ return true;
+}
+
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
@@ -4555,12 +4646,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
- !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
- !ext4_encrypted_inode(inode))
+ if (ext4_should_use_dax(inode))
new_fl |= S_DAX;
+ if (flags & EXT4_ENCRYPT_FL)
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
+ S_ENCRYPTED);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -4853,14 +4945,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
brelse(iloc.bh);
ext4_set_inode_flags(inode);
- if (ei->i_flags & EXT4_EA_INODE_FL) {
- ext4_xattr_inode_set_class(inode);
-
- inode_lock(inode);
- inode->i_flags |= S_NOQUOTA;
- inode_unlock(inode);
- }
-
unlock_new_inode(inode);
return inode;
@@ -5284,6 +5368,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
+ error = fscrypt_prepare_setattr(dentry, attr);
+ if (error)
+ return error;
+
if (is_quota_modification(inode, attr)) {
error = dquot_initialize(inode);
if (error)
@@ -5329,14 +5417,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
- if (ext4_encrypted_inode(inode)) {
- error = fscrypt_get_encryption_info(inode);
- if (error)
- return error;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
-
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5658,22 +5738,16 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
return err;
}
-/*
- * Expand an inode by new_extra_isize bytes.
- * Returns 0 on success or negative error number on failure.
- */
-static int ext4_expand_extra_isize(struct inode *inode,
- unsigned int new_extra_isize,
- struct ext4_iloc iloc,
- handle_t *handle)
+static int __ext4_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc *iloc,
+ handle_t *handle, int *no_expand)
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
+ int error;
- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
- return 0;
-
- raw_inode = ext4_raw_inode(&iloc);
+ raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
@@ -5688,8 +5762,98 @@ static int ext4_expand_extra_isize(struct inode *inode,
}
/* try to expand with EAs present */
- return ext4_expand_extra_isize_ea(inode, new_extra_isize,
- raw_inode, handle);
+ error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
+ raw_inode, handle);
+ if (error) {
+ /*
+ * Inode size expansion failed; don't try again
+ */
+ *no_expand = 1;
+ }
+
+ return error;
+}
+
+/*
+ * Expand an inode by new_extra_isize bytes.
+ * Returns 0 on success or negative error number on failure.
+ */
+static int ext4_try_to_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc iloc,
+ handle_t *handle)
+{
+ int no_expand;
+ int error;
+
+ if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND))
+ return -EOVERFLOW;
+
+ /*
+ * In nojournal mode, we can immediately attempt to expand
+ * the inode. When journaled, we first need to obtain extra
+ * buffer credits since we may write into the EA block
+ * with this same handle. If journal_extend fails, then it will
+ * only result in a minor loss of functionality for that inode.
+ * If this is felt to be critical, then e2fsck should be run to
+ * force a large enough s_min_extra_isize.
+ */
+ if (ext4_handle_valid(handle) &&
+ jbd2_journal_extend(handle,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0)
+ return -ENOSPC;
+
+ if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
+ return -EBUSY;
+
+ error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc,
+ handle, &no_expand);
+ ext4_write_unlock_xattr(inode, &no_expand);
+
+ return error;
+}
+
+int ext4_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc *iloc)
+{
+ handle_t *handle;
+ int no_expand;
+ int error, rc;
+
+ if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
+ brelse(iloc->bh);
+ return -EOVERFLOW;
+ }
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ brelse(iloc->bh);
+ return error;
+ }
+
+ ext4_write_lock_xattr(inode, &no_expand);
+
+ BUFFER_TRACE(iloc.bh, "get_write_access");
+ error = ext4_journal_get_write_access(handle, iloc->bh);
+ if (error) {
+ brelse(iloc->bh);
+ goto out_stop;
+ }
+
+ error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc,
+ handle, &no_expand);
+
+ rc = ext4_mark_iloc_dirty(handle, inode, iloc);
+ if (!error)
+ error = rc;
+
+ ext4_write_unlock_xattr(inode, &no_expand);
+out_stop:
+ ext4_journal_stop(handle);
+ return error;
}
/*
@@ -5709,44 +5873,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
struct ext4_iloc iloc;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- static unsigned int mnt_count;
- int err, ret;
+ int err;
might_sleep();
trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
return err;
- if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
- !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
- /*
- * In nojournal mode, we can immediately attempt to expand
- * the inode. When journaled, we first need to obtain extra
- * buffer credits since we may write into the EA block
- * with this same handle. If journal_extend fails, then it will
- * only result in a minor loss of functionality for that inode.
- * If this is felt to be critical, then e2fsck should be run to
- * force a large enough s_min_extra_isize.
- */
- if (!ext4_handle_valid(handle) ||
- jbd2_journal_extend(handle,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
- ret = ext4_expand_extra_isize(inode,
- sbi->s_want_extra_isize,
- iloc, handle);
- if (ret) {
- if (mnt_count !=
- le16_to_cpu(sbi->s_es->s_mnt_count)) {
- ext4_warning(inode->i_sb,
- "Unable to expand inode %lu. Delete"
- " some EAs or run e2fsck.",
- inode->i_ino);
- mnt_count =
- le16_to_cpu(sbi->s_es->s_mnt_count);
- }
- }
- }
- }
+
+ if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize)
+ ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize,
+ iloc, handle);
+
return ext4_mark_iloc_dirty(handle, inode, &iloc);
}
@@ -5884,11 +6022,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
- /*
- * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
- * E.g. S_DAX may get cleared / set.
- */
- ext4_set_inode_flags(inode);
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
@@ -6024,70 +6157,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
return err;
}
-
-/*
- * Find the first extent at or after @lblk in an inode that is not a hole.
- * Search for @map_len blocks at most. The extent is returned in @result.
- *
- * The function returns 1 if we found an extent. The function returns 0 in
- * case there is no extent at or after @lblk and in that case also sets
- * @result->es_len to 0. In case of error, the error code is returned.
- */
-int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
- unsigned int map_len, struct extent_status *result)
-{
- struct ext4_map_blocks map;
- struct extent_status es = {};
- int ret;
-
- map.m_lblk = lblk;
- map.m_len = map_len;
-
- /*
- * For non-extent based files this loop may iterate several times since
- * we do not determine full hole size.
- */
- while (map.m_len > 0) {
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0)
- return ret;
- /* There's extent covering m_lblk? Just return it. */
- if (ret > 0) {
- int status;
-
- ext4_es_store_pblock(result, map.m_pblk);
- result->es_lblk = map.m_lblk;
- result->es_len = map.m_len;
- if (map.m_flags & EXT4_MAP_UNWRITTEN)
- status = EXTENT_STATUS_UNWRITTEN;
- else
- status = EXTENT_STATUS_WRITTEN;
- ext4_es_store_status(result, status);
- return 1;
- }
- ext4_es_find_delayed_extent_range(inode, map.m_lblk,
- map.m_lblk + map.m_len - 1,
- &es);
- /* Is delalloc data before next block in extent tree? */
- if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
- ext4_lblk_t offset = 0;
-
- if (es.es_lblk < lblk)
- offset = lblk - es.es_lblk;
- result->es_lblk = es.es_lblk + offset;
- ext4_es_store_pblock(result,
- ext4_es_pblock(&es) + offset);
- result->es_len = es.es_len - offset;
- ext4_es_store_status(result, ext4_es_status(&es));
-
- return 1;
- }
- /* There's a hole at m_lblk, advance us after it */
- map.m_lblk += map.m_len;
- map_len -= map.m_len;
- map.m_len = map_len;
- cond_resched();
- }
- result->es_len = 0;
- return 0;
-}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 42b3a73143cf..1eec25014f62 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/ioctl.c
*
@@ -14,6 +15,7 @@
#include <linux/mount.h>
#include <linux/file.h>
#include <linux/quotaops.h>
+#include <linux/random.h>
#include <linux/uuid.h>
#include <linux/uaccess.h>
#include <linux/delay.h>
@@ -64,18 +66,16 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
ei1 = EXT4_I(inode1);
ei2 = EXT4_I(inode2);
- memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags));
- memswap(&inode1->i_version, &inode2->i_version,
- sizeof(inode1->i_version));
- memswap(&inode1->i_blocks, &inode2->i_blocks,
- sizeof(inode1->i_blocks));
- memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes));
- memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime));
- memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime));
+ swap(inode1->i_flags, inode2->i_flags);
+ swap(inode1->i_version, inode2->i_version);
+ swap(inode1->i_blocks, inode2->i_blocks);
+ swap(inode1->i_bytes, inode2->i_bytes);
+ swap(inode1->i_atime, inode2->i_atime);
+ swap(inode1->i_mtime, inode2->i_mtime);
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
- memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
- memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
+ swap(ei1->i_flags, ei2->i_flags);
+ swap(ei1->i_disksize, ei2->i_disksize);
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
@@ -100,7 +100,6 @@ static long swap_inode_boot_loader(struct super_block *sb,
int err;
struct inode *inode_bl;
struct ext4_inode_info *ei_bl;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode))
return -EINVAL;
@@ -159,10 +158,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
inode->i_ctime = inode_bl->i_ctime = current_time(inode);
- spin_lock(&sbi->s_next_gen_lock);
- inode->i_generation = sbi->s_next_generation++;
- inode_bl->i_generation = sbi->s_next_generation++;
- spin_unlock(&sbi->s_next_gen_lock);
+ inode->i_generation = prandom_u32();
+ inode_bl->i_generation = prandom_u32();
ext4_discard_preallocations(inode);
@@ -292,10 +289,20 @@ flags_err:
if (err)
goto flags_out;
- if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+ if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
+ /*
+ * Changes to the journaling mode can cause unsafe changes to
+ * S_DAX if we are using the DAX mount option.
+ */
+ if (test_opt(inode->i_sb, DAX)) {
+ err = -EBUSY;
+ goto flags_out;
+ }
+
err = ext4_change_inode_journal_flag(inode, jflag);
- if (err)
- goto flags_out;
+ if (err)
+ goto flags_out;
+ }
if (migrate) {
if (flags & EXT4_EXTENTS_FL)
err = ext4_ext_migrate(inode);
@@ -351,11 +358,14 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
raw_inode = ext4_raw_inode(&iloc);
if (!EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) {
- err = -EOVERFLOW;
+ err = ext4_expand_extra_isize(inode,
+ EXT4_SB(sb)->s_want_extra_isize,
+ &iloc);
+ if (err)
+ goto out_unlock;
+ } else {
brelse(iloc.bh);
- goto out_unlock;
}
- brelse(iloc.bh);
dquot_initialize(inode);
@@ -582,6 +592,44 @@ static int ext4_ioc_getfsmap(struct super_block *sb,
return 0;
}
+static long ext4_ioctl_group_add(struct file *file,
+ struct ext4_new_group_data *input)
+{
+ struct super_block *sb = file_inode(file)->i_sb;
+ int err, err2=0;
+
+ err = ext4_resize_begin(sb);
+ if (err)
+ return err;
+
+ if (ext4_has_feature_bigalloc(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online resizing not supported with bigalloc");
+ err = -EOPNOTSUPP;
+ goto group_add_out;
+ }
+
+ err = mnt_want_write_file(file);
+ if (err)
+ goto group_add_out;
+
+ err = ext4_group_add(sb, input);
+ if (EXT4_SB(sb)->s_journal) {
+ jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
+ err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
+ jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
+ }
+ if (err == 0)
+ err = err2;
+ mnt_drop_write_file(file);
+ if (!err && ext4_has_group_desc_csum(sb) &&
+ test_opt(sb, INIT_INODE_TABLE))
+ err = ext4_register_li_request(sb, input->group);
+group_add_out:
+ ext4_resize_end(sb);
+ return err;
+}
+
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
@@ -766,44 +814,12 @@ mext_out:
case EXT4_IOC_GROUP_ADD: {
struct ext4_new_group_data input;
- int err, err2=0;
-
- err = ext4_resize_begin(sb);
- if (err)
- return err;
if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
- sizeof(input))) {
- err = -EFAULT;
- goto group_add_out;
- }
-
- if (ext4_has_feature_bigalloc(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not supported with bigalloc");
- err = -EOPNOTSUPP;
- goto group_add_out;
- }
-
- err = mnt_want_write_file(filp);
- if (err)
- goto group_add_out;
+ sizeof(input)))
+ return -EFAULT;
- err = ext4_group_add(sb, &input);
- if (EXT4_SB(sb)->s_journal) {
- jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
- err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
- jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
- }
- if (err == 0)
- err = err2;
- mnt_drop_write_file(filp);
- if (!err && ext4_has_group_desc_csum(sb) &&
- test_opt(sb, INIT_INODE_TABLE))
- err = ext4_register_li_request(sb, input.group);
-group_add_out:
- ext4_resize_end(sb);
- return err;
+ return ext4_ioctl_group_add(filp, &input);
}
case EXT4_IOC_MIGRATE:
@@ -860,12 +876,6 @@ group_add_out:
int err = 0, err2 = 0;
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
- if (ext4_has_feature_bigalloc(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Online resizing not (yet) supported with bigalloc");
- return -EOPNOTSUPP;
- }
-
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) {
return -EFAULT;
@@ -1074,8 +1084,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
break;
case EXT4_IOC32_GROUP_ADD: {
struct compat_ext4_new_group_input __user *uinput;
- struct ext4_new_group_input input;
- mm_segment_t old_fs;
+ struct ext4_new_group_data input;
int err;
uinput = compat_ptr(arg);
@@ -1088,12 +1097,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
&uinput->reserved_blocks);
if (err)
return -EFAULT;
- old_fs = get_fs();
- set_fs(KERNEL_DS);
- err = ext4_ioctl(file, EXT4_IOC_GROUP_ADD,
- (unsigned long) &input);
- set_fs(old_fs);
- return err;
+ return ext4_ioctl_group_add(file, &input);
}
case EXT4_IOC_MOVE_EXT:
case EXT4_IOC_RESIZE_FS:
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 581e357e8406..d9f8b90a93ed 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2295,6 +2295,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
int err, buddy_loaded = 0;
struct ext4_buddy e4b;
struct ext4_group_info *grinfo;
+ unsigned char blocksize_bits = min_t(unsigned char,
+ sb->s_blocksize_bits,
+ EXT4_MAX_BLOCK_LOG_SIZE);
struct sg {
struct ext4_group_info info;
ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
@@ -2306,8 +2309,9 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
" 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
- i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
+ i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
sizeof(struct ext4_group_info);
+
grinfo = ext4_get_group_info(sb, group);
/* Load the group info in memory only if not already loaded. */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grinfo))) {
@@ -2327,7 +2331,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
sg.info.bb_fragments, sg.info.bb_first_free);
for (i = 0; i <= 13; i++)
- seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
+ seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
sg.info.bb_counters[i] : 0);
seq_printf(seq, " ]\n");
@@ -2892,8 +2896,10 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
break;
}
- if (discard_bio)
+ if (discard_bio) {
submit_bio_wait(discard_bio);
+ bio_put(discard_bio);
+ }
}
list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
@@ -4988,8 +4994,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_buddy e4b;
- int err = 0, ret, blk_free_count;
- ext4_grpblk_t blocks_freed;
+ int err = 0, ret, free_clusters_count;
+ ext4_grpblk_t clusters_freed;
+ ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
+ ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
+ unsigned long cluster_count = last_cluster - first_cluster + 1;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
@@ -5001,8 +5010,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* Check to see if we are freeing blocks across a group
* boundary.
*/
- if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
- ext4_warning(sb, "too much blocks added to group %u",
+ if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
+ ext4_warning(sb, "too many blocks added to group %u",
block_group);
err = -EINVAL;
goto error_return;
@@ -5048,14 +5057,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (err)
goto error_return;
- for (i = 0, blocks_freed = 0; i < count; i++) {
+ for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
BUFFER_TRACE(bitmap_bh, "clear bit");
if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
ext4_error(sb, "bit already cleared for block %llu",
(ext4_fsblk_t)(block + i));
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
- blocks_freed++;
+ clusters_freed++;
}
}
@@ -5069,19 +5078,20 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* them with group lock_held
*/
ext4_lock_group(sb, block_group);
- mb_clear_bits(bitmap_bh->b_data, bit, count);
- mb_free_blocks(NULL, &e4b, bit, count);
- blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
- ext4_free_group_clusters_set(sb, desc, blk_free_count);
+ mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
+ mb_free_blocks(NULL, &e4b, bit, cluster_count);
+ free_clusters_count = clusters_freed +
+ ext4_free_group_clusters(sb, desc);
+ ext4_free_group_clusters_set(sb, desc, free_clusters_count);
ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter,
- EXT4_NUM_B2C(sbi, blocks_freed));
+ clusters_freed);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
- atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
+ atomic64_add(clusters_freed,
&sbi->s_flex_groups[flex_group].free_clusters);
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 009300ee1561..dcf52540f379 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* fs/ext4/mballoc.h
*
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index eb9835638680..27b9a76a0dfa 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/fs.h>
#include <linux/random.h>
#include <linux/buffer_head.h>
@@ -185,7 +186,7 @@ static int kmmpd(void *data)
goto exit_thread;
}
- if (sb->s_flags & MS_RDONLY) {
+ if (sb_rdonly(sb)) {
ext4_warning(sb, "kmmpd being stopped since filesystem "
"has been remounted as readonly.");
goto exit_thread;
@@ -367,7 +368,7 @@ skip:
goto failed;
}
- mmpd_data = kmalloc(sizeof(struct mmpd_data), GFP_KERNEL);
+ mmpd_data = kmalloc(sizeof(*mmpd_data), GFP_KERNEL);
if (!mmpd_data) {
ext4_warning(sb, "not enough memory for mmpd_data");
goto failed;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 13f0cadb1238..798b3ac680db 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/namei.c
*
@@ -1342,13 +1343,12 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
struct super_block *sb;
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
- ext4_lblk_t start, block, b;
+ ext4_lblk_t start, block;
const u8 *name = d_name->name;
- int ra_max = 0; /* Number of bh's in the readahead
+ size_t ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
- int ra_ptr = 0; /* Current index into readahead
+ size_t ra_ptr = 0; /* Current index into readahead
buffer */
- int num = 0;
ext4_lblk_t nblocks;
int i, namelen, retval;
struct ext4_filename fname;
@@ -1411,31 +1411,17 @@ restart:
if (ra_ptr >= ra_max) {
/* Refill the readahead buffer */
ra_ptr = 0;
- b = block;
- for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
- /*
- * Terminate if we reach the end of the
- * directory and must wrap, or if our
- * search has finished at this block.
- */
- if (b >= nblocks || (num && block == start)) {
- bh_use[ra_max] = NULL;
- break;
- }
- num++;
- bh = ext4_getblk(NULL, dir, b++, 0);
- if (IS_ERR(bh)) {
- if (ra_max == 0) {
- ret = bh;
- goto cleanup_and_exit;
- }
- break;
- }
- bh_use[ra_max] = bh;
- if (bh)
- ll_rw_block(REQ_OP_READ,
- REQ_META | REQ_PRIO,
- 1, &bh);
+ if (block < start)
+ ra_max = start - block;
+ else
+ ra_max = nblocks - block;
+ ra_max = min(ra_max, ARRAY_SIZE(bh_use));
+ retval = ext4_bread_batch(dir, block, ra_max,
+ false /* wait */, bh_use);
+ if (retval) {
+ ret = ERR_PTR(retval);
+ ra_max = 0;
+ goto cleanup_and_exit;
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1553,24 +1539,14 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
struct inode *inode;
struct ext4_dir_entry_2 *de;
struct buffer_head *bh;
+ int err;
- if (ext4_encrypted_inode(dir)) {
- int res = fscrypt_get_encryption_info(dir);
-
- /*
- * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
- * created while the directory was encrypted and we
- * have access to the key.
- */
- if (fscrypt_has_encryption_key(dir))
- fscrypt_set_encrypted_dentry(dentry);
- fscrypt_set_d_op(dentry);
- if (res && res != -ENOKEY)
- return ERR_PTR(res);
- }
+ err = fscrypt_prepare_lookup(dir, dentry, flags);
+ if (err)
+ return ERR_PTR(err);
- if (dentry->d_name.len > EXT4_NAME_LEN)
- return ERR_PTR(-ENAMETOOLONG);
+ if (dentry->d_name.len > EXT4_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
if (IS_ERR(bh))
@@ -2395,19 +2371,22 @@ out:
}
/*
- * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
- * since this indicates that nlinks count was previously 1.
+ * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2
+ * since this indicates that nlinks count was previously 1 to avoid overflowing
+ * the 16-bit i_links_count field on disk. Directories with i_nlink == 1 mean
+ * that subdirectory link counts are not being maintained accurately.
+ *
+ * The caller has already checked for i_nlink overflow in case the DIR_LINK
+ * feature is not enabled and returned -EMLINK. The is_dx() check is a proxy
+ * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
+ * on regular files) and to avoid creating huge/slow non-HTREE directories.
*/
static void ext4_inc_count(handle_t *handle, struct inode *inode)
{
inc_nlink(inode);
- if (is_dx(inode) && inode->i_nlink > 1) {
- /* limit is 16-bit i_links_count */
- if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
- set_nlink(inode, 1);
- ext4_set_feature_dir_nlink(inode->i_sb);
- }
- }
+ if (is_dx(inode) &&
+ (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2))
+ set_nlink(inode, 1);
}
/*
@@ -3233,9 +3212,10 @@ static int ext4_link(struct dentry *old_dentry,
if (inode->i_nlink >= EXT4_LINK_MAX)
return -EMLINK;
- if (ext4_encrypted_inode(dir) &&
- !fscrypt_has_permitted_context(dir, inode))
- return -EPERM;
+
+ err = fscrypt_prepare_link(old_dentry, dir, dentry);
+ if (err)
+ return err;
if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
(!projid_eq(EXT4_I(dir)->i_projid,
@@ -3527,12 +3507,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
EXT4_I(old_dentry->d_inode)->i_projid)))
return -EXDEV;
- if ((ext4_encrypted_inode(old_dir) &&
- !fscrypt_has_encryption_key(old_dir)) ||
- (ext4_encrypted_inode(new_dir) &&
- !fscrypt_has_encryption_key(new_dir)))
- return -ENOKEY;
-
retval = dquot_initialize(old.dir);
if (retval)
return retval;
@@ -3561,13 +3535,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
goto end_rename;
- if ((old.dir != new.dir) &&
- ext4_encrypted_inode(new.dir) &&
- !fscrypt_has_permitted_context(new.dir, old.inode)) {
- retval = -EPERM;
- goto end_rename;
- }
-
new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
&new.de, &new.inlined);
if (IS_ERR(new.bh)) {
@@ -3733,19 +3700,6 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int retval;
struct timespec ctime;
- if ((ext4_encrypted_inode(old_dir) &&
- !fscrypt_has_encryption_key(old_dir)) ||
- (ext4_encrypted_inode(new_dir) &&
- !fscrypt_has_encryption_key(new_dir)))
- return -ENOKEY;
-
- if ((ext4_encrypted_inode(old_dir) ||
- ext4_encrypted_inode(new_dir)) &&
- (old_dir != new_dir) &&
- (!fscrypt_has_permitted_context(new_dir, old.inode) ||
- !fscrypt_has_permitted_context(old_dir, new.inode)))
- return -EPERM;
-
if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
!projid_eq(EXT4_I(new_dir)->i_projid,
EXT4_I(old_dentry->d_inode)->i_projid)) ||
@@ -3872,12 +3826,19 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry,
unsigned int flags)
{
+ int err;
+
if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
return -EIO;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
+ err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
+ flags);
+ if (err)
+ return err;
+
if (flags & RENAME_EXCHANGE) {
return ext4_cross_rename(old_dir, old_dentry,
new_dir, new_dentry);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index c2fce4478cca..db7590178dfc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/page-io.c
*
@@ -300,7 +301,7 @@ static void ext4_end_bio(struct bio *bio)
char b[BDEVNAME_SIZE];
if (WARN_ONCE(!io_end, "io_end is NULL: %s: sector %Lu len %u err %d\n",
- bdevname(bio->bi_bdev, b),
+ bio_devname(bio, b),
(long long) bio->bi_iter.bi_sector,
(unsigned) bio_sectors(bio),
bio->bi_status)) {
@@ -375,7 +376,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
+ bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 40a5497b0f60..9ffa6fad18db 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/readpage.c
*
@@ -254,7 +255,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
fscrypt_release_ctx(ctx);
goto set_error_page;
}
- bio->bi_bdev = bdev;
+ bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
bio->bi_end_io = mpage_end_io;
bio->bi_private = ctx;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c3ed9021b781..50443bda8e98 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/resize.c
*
@@ -106,7 +107,7 @@ static int verify_group_input(struct super_block *sb,
overhead = ext4_group_overhead_blocks(sb, group);
metaend = start + overhead;
- input->free_blocks_count = free_blocks_count =
+ input->free_clusters_count = free_blocks_count =
input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
if (test_opt(sb, DEBUG))
@@ -257,6 +258,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
ext4_group_t last_group;
unsigned overhead;
__u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
+ int i;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
@@ -293,7 +295,7 @@ next_group:
group_data[bb_index].block_bitmap = start_blk++;
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
- group_data[group].free_blocks_count--;
+ group_data[group].mdata_blocks++;
flex_gd->bg_flags[group] &= uninit_mask;
}
@@ -304,7 +306,7 @@ next_group:
group_data[ib_index].inode_bitmap = start_blk++;
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
- group_data[group].free_blocks_count--;
+ group_data[group].mdata_blocks++;
flex_gd->bg_flags[group] &= uninit_mask;
}
@@ -323,15 +325,22 @@ next_group:
if (start_blk + itb > next_group_start) {
flex_gd->bg_flags[group + 1] &= uninit_mask;
overhead = start_blk + itb - next_group_start;
- group_data[group + 1].free_blocks_count -= overhead;
+ group_data[group + 1].mdata_blocks += overhead;
itb -= overhead;
}
- group_data[group].free_blocks_count -= itb;
+ group_data[group].mdata_blocks += itb;
flex_gd->bg_flags[group] &= uninit_mask;
start_blk += EXT4_SB(sb)->s_itb_per_group;
}
+ /* Update free clusters count to exclude metadata blocks */
+ for (i = 0; i < flex_gd->count; i++) {
+ group_data[i].free_clusters_count -=
+ EXT4_NUM_B2C(EXT4_SB(sb),
+ group_data[i].mdata_blocks);
+ }
+
if (test_opt(sb, DEBUG)) {
int i;
group = group_data[0].group;
@@ -341,12 +350,13 @@ next_group:
flexbg_size);
for (i = 0; i < flex_gd->count; i++) {
- printk(KERN_DEBUG "adding %s group %u: %u "
- "blocks (%d free)\n",
+ ext4_debug(
+ "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i,
group_data[i].blocks_count,
- group_data[i].free_blocks_count);
+ group_data[i].free_clusters_count,
+ group_data[i].mdata_blocks);
}
}
return 0;
@@ -398,7 +408,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
}
/*
- * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
+ * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used.
*
* Helper function for ext4_setup_new_group_blocks() which set .
*
@@ -408,22 +418,26 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
*/
static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
struct ext4_new_flex_group_data *flex_gd,
- ext4_fsblk_t block, ext4_group_t count)
+ ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster)
{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_group_t count = last_cluster - first_cluster + 1;
ext4_group_t count2;
- ext4_debug("mark blocks [%llu/%u] used\n", block, count);
- for (count2 = count; count > 0; count -= count2, block += count2) {
+ ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster,
+ last_cluster);
+ for (count2 = count; count > 0;
+ count -= count2, first_cluster += count2) {
ext4_fsblk_t start;
struct buffer_head *bh;
ext4_group_t group;
int err;
- group = ext4_get_group_number(sb, block);
- start = ext4_group_first_block_no(sb, group);
+ group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster));
+ start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group));
group -= flex_gd->groups[0].group;
- count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start);
+ count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start);
if (count2 > count)
count2 = count;
@@ -444,9 +458,9 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
err = ext4_journal_get_write_access(handle, bh);
if (err)
return err;
- ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
- block - start, count2);
- ext4_set_bits(bh->b_data, block - start, count2);
+ ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
+ first_cluster, first_cluster - start, count2);
+ ext4_set_bits(bh->b_data, first_cluster - start, count2);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (unlikely(err))
@@ -595,9 +609,10 @@ handle_bb:
if (overhead != 0) {
ext4_debug("mark backup superblock %#04llx (+0)\n",
start);
- ext4_set_bits(bh->b_data, 0, overhead);
+ ext4_set_bits(bh->b_data, 0,
+ EXT4_NUM_B2C(sbi, overhead));
}
- ext4_mark_bitmap_end(group_data[i].blocks_count,
+ ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
sb->s_blocksize * 8, bh->b_data);
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (err)
@@ -642,7 +657,11 @@ handle_ib:
continue;
}
err = set_flexbg_block_bitmap(sb, handle,
- flex_gd, start, count);
+ flex_gd,
+ EXT4_B2C(sbi, start),
+ EXT4_B2C(sbi,
+ start + count
+ - 1));
if (err)
goto out;
count = group_table_count[j];
@@ -652,7 +671,11 @@ handle_ib:
if (count) {
err = set_flexbg_block_bitmap(sb, handle,
- flex_gd, start, count);
+ flex_gd,
+ EXT4_B2C(sbi, start),
+ EXT4_B2C(sbi,
+ start + count
+ - 1));
if (err)
goto out;
}
@@ -840,7 +863,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
ext4_std_error(sb, err);
goto exit_inode;
}
- inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
+ inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
+ (9 - EXT4_SB(sb)->s_cluster_bits);
ext4_mark_iloc_dirty(handle, inode, &iloc);
memset(gdb_bh->b_data, 0, sb->s_blocksize);
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
@@ -935,6 +959,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
{
struct super_block *sb = inode->i_sb;
int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
+ int cluster_bits = EXT4_SB(sb)->s_cluster_bits;
struct buffer_head **primary;
struct buffer_head *dind;
struct ext4_iloc iloc;
@@ -1010,7 +1035,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
if (!err)
err = err2;
}
- inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
+
+ inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits);
ext4_mark_iloc_dirty(handle, inode, &iloc);
exit_bh:
@@ -1244,7 +1270,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
ext4_group_t group;
__u16 *bg_flags = flex_gd->bg_flags;
int i, gdb_off, gdb_num, err = 0;
-
+
for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
group = group_data->group;
@@ -1271,7 +1297,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
ext4_inode_table_set(sb, gdp, group_data->inode_table);
ext4_free_group_clusters_set(sb, gdp,
- EXT4_NUM_B2C(sbi, group_data->free_blocks_count));
+ group_data->free_clusters_count);
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
if (ext4_has_group_desc_csum(sb))
ext4_itable_unused_set(sb, gdp,
@@ -1327,7 +1353,7 @@ static void ext4_update_super(struct super_block *sb,
*/
for (i = 0; i < flex_gd->count; i++) {
blocks_count += group_data[i].blocks_count;
- free_blocks += group_data[i].free_blocks_count;
+ free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count);
}
reserved_blocks = ext4_r_blocks_count(es) * 100;
@@ -1499,17 +1525,18 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
ext4_fsblk_t n_blocks_count,
unsigned long flexbg_size)
{
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_super_block *es = sbi->s_es;
struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t o_blocks_count;
ext4_group_t n_group;
ext4_group_t group;
ext4_group_t last_group;
ext4_grpblk_t last;
- ext4_grpblk_t blocks_per_group;
+ ext4_grpblk_t clusters_per_group;
unsigned long i;
- blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
+ clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb);
o_blocks_count = ext4_blocks_count(es);
@@ -1530,9 +1557,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
int overhead;
group_data[i].group = group + i;
- group_data[i].blocks_count = blocks_per_group;
+ group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb);
overhead = ext4_group_overhead_blocks(sb, group + i);
- group_data[i].free_blocks_count = blocks_per_group - overhead;
+ group_data[i].mdata_blocks = overhead;
+ group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb);
if (ext4_has_group_desc_csum(sb)) {
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
EXT4_BG_INODE_UNINIT;
@@ -1546,10 +1574,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
/* We need to initialize block bitmap of last group. */
flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
- if ((last_group == n_group) && (last != blocks_per_group - 1)) {
- group_data[i - 1].blocks_count = last + 1;
- group_data[i - 1].free_blocks_count -= blocks_per_group-
- last - 1;
+ if ((last_group == n_group) && (last != clusters_per_group - 1)) {
+ group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1);
+ group_data[i - 1].free_clusters_count -= clusters_per_group -
+ last - 1;
}
return 1;
@@ -1796,7 +1824,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
}
/* Do a quick sanity check of the resize inode */
- if (inode->i_blocks != 1 << (inode->i_blkbits - 9))
+ if (inode->i_blocks != 1 << (inode->i_blkbits -
+ (9 - sbi->s_cluster_bits)))
goto invalid_resize_inode;
for (i = 0; i < EXT4_N_BLOCKS; i++) {
if (i == EXT4_DIND_BLOCK) {
@@ -1927,7 +1956,8 @@ retry:
n_desc_blocks = o_desc_blocks +
le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
- n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb);
+ n_blocks_count = (ext4_fsblk_t)n_group *
+ EXT4_BLOCKS_PER_GROUP(sb);
n_group--; /* set to last group number */
}
@@ -1958,7 +1988,7 @@ retry:
if (n_group == o_group)
add = n_blocks_count - o_blocks_count;
else
- add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1);
+ add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1));
if (add > 0) {
err = ext4_group_extend_no_check(sb, o_blocks_count, add);
if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0886fe82e9c4..0556cd036b69 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -405,7 +405,7 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
static void ext4_handle_error(struct super_block *sb)
{
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return;
if (!test_opt(sb, ERRORS_CONT)) {
@@ -587,8 +587,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
/* Special case: if the error is EROFS, and we're not already
* inside a transaction, then there's really no point in logging
* an error. */
- if (errno == -EROFS && journal_current_handle() == NULL &&
- (sb->s_flags & MS_RDONLY))
+ if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
return;
if (ext4_error_ratelimit(sb)) {
@@ -628,7 +627,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
sb->s_id, function, line, &vaf);
va_end(args);
- if ((sb->s_flags & MS_RDONLY) == 0) {
+ if (sb_rdonly(sb) == 0) {
ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
/*
@@ -889,11 +888,11 @@ static void ext4_put_super(struct super_block *sb)
ext4_mb_release(sb);
ext4_ext_release(sb);
- if (!(sb->s_flags & MS_RDONLY) && !aborted) {
+ if (!sb_rdonly(sb) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
- if (!(sb->s_flags & MS_RDONLY))
+ if (!sb_rdonly(sb))
ext4_commit_super(sb, 1);
for (i = 0; i < sbi->s_gdb_count; i++)
@@ -951,6 +950,7 @@ static void ext4_put_super(struct super_block *sb)
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
+ fs_put_dax(sbi->s_daxdev);
kfree(sbi);
}
@@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_es_shk_nr = 0;
ei->i_es_shrink_lblk = 0;
ei->i_reserved_data_blocks = 0;
- ei->i_reserved_meta_blocks = 0;
- ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
ei->i_da_metadata_calc_last_lblock = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
@@ -1161,6 +1159,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (inode->i_ino == EXT4_ROOT_INO)
return -EPERM;
+ if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
+ return -EINVAL;
+
res = ext4_convert_inline_data(inode);
if (res)
return res;
@@ -1183,7 +1184,8 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
ext4_clear_inode_state(inode,
EXT4_STATE_MAY_INLINE_DATA);
/*
- * Update inode->i_flags - e.g. S_DAX may get disabled
+ * Update inode->i_flags - S_ENCRYPTED will be enabled,
+ * S_DAX may be disabled
*/
ext4_set_inode_flags(inode);
}
@@ -1208,7 +1210,10 @@ retry:
ctx, len, 0);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
- /* Update inode->i_flags - e.g. S_DAX may get disabled */
+ /*
+ * Update inode->i_flags - S_ENCRYPTED will be enabled,
+ * S_DAX may be disabled
+ */
ext4_set_inode_flags(inode);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
@@ -1239,14 +1244,9 @@ static const struct fscrypt_operations ext4_cryptops = {
.get_context = ext4_get_context,
.set_context = ext4_set_context,
.dummy_context = ext4_dummy_context,
- .is_encrypted = ext4_encrypted_inode,
.empty_dir = ext4_empty_dir,
.max_namelen = ext4_max_namelen,
};
-#else
-static const struct fscrypt_operations ext4_cryptops = {
- .is_encrypted = ext4_encrypted_inode,
-};
#endif
#ifdef CONFIG_QUOTA
@@ -1679,7 +1679,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
sbi->s_mount_flags |= EXT4_MF_FS_ABORTED;
return 1;
case Opt_i_version:
- sb->s_flags |= MS_I_VERSION;
+ sb->s_flags |= SB_I_VERSION;
return 1;
case Opt_lazytime:
sb->s_flags |= MS_LAZYTIME;
@@ -2062,7 +2062,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
- if (sb->s_flags & MS_I_VERSION)
+ if (sb->s_flags & SB_I_VERSION)
SEQ_OPTS_PUTS("i_version");
if (nodefs || sbi->s_stripe)
SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
@@ -2101,7 +2101,7 @@ int ext4_seq_options_show(struct seq_file *seq, void *offset)
struct super_block *sb = seq->private;
int rc;
- seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw");
+ seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
rc = _ext4_show_options(seq, sb, 1);
seq_puts(seq, "\n");
return rc;
@@ -2369,7 +2369,7 @@ static int ext4_check_descriptors(struct super_block *sb,
"Checksum for group %u failed (%u!=%u)",
i, le16_to_cpu(ext4_group_desc_csum(sb, i,
gdp)), le16_to_cpu(gdp->bg_checksum));
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!sb_rdonly(sb)) {
ext4_unlock_group(sb, i);
return 0;
}
@@ -2406,6 +2406,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
unsigned int s_flags = sb->s_flags;
int ret, nr_orphans = 0, nr_truncates = 0;
#ifdef CONFIG_QUOTA
+ int quota_update = 0;
int i;
#endif
if (!es->s_last_orphan) {
@@ -2444,14 +2445,32 @@ static void ext4_orphan_cleanup(struct super_block *sb,
#ifdef CONFIG_QUOTA
/* Needed for iput() to work correctly and not trash data */
sb->s_flags |= MS_ACTIVE;
- /* Turn on quotas so that they are updated correctly */
+
+ /*
+ * Turn on quotas which were not enabled for read-only mounts if
+ * filesystem has quota feature, so that they are updated correctly.
+ */
+ if (ext4_has_feature_quota(sb) && (s_flags & MS_RDONLY)) {
+ int ret = ext4_enable_quotas(sb);
+
+ if (!ret)
+ quota_update = 1;
+ else
+ ext4_msg(sb, KERN_ERR,
+ "Cannot turn on quotas: error %d", ret);
+ }
+
+ /* Turn on journaled quotas used for old sytle */
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
if (EXT4_SB(sb)->s_qf_names[i]) {
int ret = ext4_quota_on_mount(sb, i);
- if (ret < 0)
+
+ if (!ret)
+ quota_update = 1;
+ else
ext4_msg(sb, KERN_ERR,
"Cannot turn on journaled "
- "quota: error %d", ret);
+ "quota: type %d: error %d", i, ret);
}
}
#endif
@@ -2512,10 +2531,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
PLURAL(nr_truncates));
#ifdef CONFIG_QUOTA
- /* Turn quotas off */
- for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- if (sb_dqopt(sb)->files[i])
- dquot_quota_off(sb, i);
+ /* Turn off quotas if they were enabled for orphan cleanup */
+ if (quota_update) {
+ for (i = 0; i < EXT4_MAXQUOTAS; i++) {
+ if (sb_dqopt(sb)->files[i])
+ dquot_quota_off(sb, i);
+ }
}
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
@@ -2772,14 +2793,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
* This function is called once a day if we have errors logged
* on the file system
*/
-static void print_daily_error_info(unsigned long arg)
+static void print_daily_error_info(struct timer_list *t)
{
- struct super_block *sb = (struct super_block *) arg;
- struct ext4_sb_info *sbi;
- struct ext4_super_block *es;
-
- sbi = EXT4_SB(sb);
- es = sbi->s_es;
+ struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
+ struct super_block *sb = sbi->s_sb;
+ struct ext4_super_block *es = sbi->s_es;
if (es->s_error_count)
/* fsck newer than v1.41.13 is needed to clean this condition. */
@@ -3116,8 +3134,7 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}
- if (first_not_zeroed == ngroups ||
- (sb->s_flags & MS_RDONLY) ||
+ if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
!test_opt(sb, INIT_INODE_TABLE))
goto out;
@@ -3379,6 +3396,7 @@ static void ext4_set_resv_clusters(struct super_block *sb)
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
+ struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh;
struct ext4_super_block *es = NULL;
@@ -3404,6 +3422,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if ((data && !orig_data) || !sbi)
goto out_free_base;
+ sbi->s_daxdev = dax_dev;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
if (!sbi->s_blockgroup_lock)
@@ -3661,7 +3680,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* previously didn't change the revision level when setting the flags,
* so there is a chance incompat flags are set on a rev 0 filesystem.
*/
- if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
+ if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
goto failed_mount;
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
@@ -3688,6 +3707,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (ext4_has_feature_inline_data(sb)) {
+ ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
+ " that may contain inline data");
+ goto failed_mount;
+ }
err = bdev_dax_supported(sb, blocksize);
if (err)
goto failed_mount;
@@ -3790,12 +3814,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_hash_unsigned = 3;
else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
#ifdef __CHAR_UNSIGNED__
- if (!(sb->s_flags & MS_RDONLY))
+ if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
sbi->s_hash_unsigned = 3;
#else
- if (!(sb->s_flags & MS_RDONLY))
+ if (!sb_rdonly(sb))
es->s_flags |=
cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
@@ -3957,11 +3981,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
sbi->s_gdb_count = db_count;
- get_random_bytes(&sbi->s_next_generation, sizeof(u32));
- spin_lock_init(&sbi->s_next_gen_lock);
- setup_timer(&sbi->s_err_report, print_daily_error_info,
- (unsigned long) sb);
+ timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
/* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi))
@@ -3976,7 +3997,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &ext4_sops;
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
sb->s_cop = &ext4_cryptops;
+#endif
#ifdef CONFIG_QUOTA
sb->dq_op = &ext4_quota_operations;
if (ext4_has_feature_quota(sb))
@@ -3995,7 +4018,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
needs_recovery = (es->s_last_orphan != 0 ||
ext4_has_feature_journal_needs_recovery(sb));
- if (ext4_has_feature_mmp(sb) && !(sb->s_flags & MS_RDONLY))
+ if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
goto failed_mount3a;
@@ -4007,7 +4030,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
err = ext4_load_journal(sb, es, journal_devnum);
if (err)
goto failed_mount3a;
- } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
+ } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
@@ -4121,7 +4144,7 @@ no_journal:
goto failed_mount_wq;
}
- if (DUMMY_ENCRYPTION_ENABLED(sbi) && !(sb->s_flags & MS_RDONLY) &&
+ if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) &&
!ext4_has_feature_encrypt(sb)) {
ext4_set_feature_encrypt(sb);
ext4_commit_super(sb, 1);
@@ -4175,7 +4198,7 @@ no_journal:
goto failed_mount4;
}
- if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
+ if (ext4_setup_super(sb, es, sb_rdonly(sb)))
sb->s_flags |= MS_RDONLY;
/* determine the minimum size of new large inodes, if present */
@@ -4263,7 +4286,7 @@ no_journal:
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
- if (ext4_has_feature_quota(sb) && !(sb->s_flags & MS_RDONLY)) {
+ if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount8;
@@ -4380,6 +4403,7 @@ out_fail:
out_free_base:
kfree(sbi);
kfree(orig_data);
+ fs_put_dax(dax_dev);
return err ? err : ret;
}
@@ -4586,12 +4610,13 @@ static int ext4_load_journal(struct super_block *sb,
* can get read-write access to the device.
*/
if (ext4_has_feature_journal_needs_recovery(sb)) {
- if (sb->s_flags & MS_RDONLY) {
+ if (sb_rdonly(sb)) {
ext4_msg(sb, KERN_INFO, "INFO: recovery "
"required on readonly filesystem");
if (really_read_only) {
ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, cannot proceed");
+ "unavailable, cannot proceed "
+ "(try mounting with noload)");
return -EROFS;
}
ext4_msg(sb, KERN_INFO, "write access will "
@@ -4741,8 +4766,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
if (jbd2_journal_flush(journal) < 0)
goto out;
- if (ext4_has_feature_journal_needs_recovery(sb) &&
- sb->s_flags & MS_RDONLY) {
+ if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
ext4_clear_feature_journal_needs_recovery(sb);
ext4_commit_super(sb, 1);
}
@@ -4798,7 +4822,7 @@ int ext4_force_commit(struct super_block *sb)
{
journal_t *journal;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 0;
journal = EXT4_SB(sb)->s_journal;
@@ -4863,7 +4887,7 @@ static int ext4_freeze(struct super_block *sb)
int error = 0;
journal_t *journal;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 0;
journal = EXT4_SB(sb)->s_journal;
@@ -4898,7 +4922,7 @@ out:
*/
static int ext4_unfreeze(struct super_block *sb)
{
- if ((sb->s_flags & MS_RDONLY) || ext4_forced_shutdown(EXT4_SB(sb)))
+ if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
return 0;
if (EXT4_SB(sb)->s_journal) {
@@ -5036,7 +5060,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (*flags & MS_LAZYTIME)
sb->s_flags |= MS_LAZYTIME;
- if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
+ if ((bool)(*flags & MS_RDONLY) != sb_rdonly(sb)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
goto restore_opts;
@@ -5131,7 +5155,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* Reinitialize lazy itable initialization thread based on
* current settings
*/
- if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
+ if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
ext4_unregister_li_request(sb);
else {
ext4_group_t first_not_zeroed;
@@ -5196,7 +5220,7 @@ static int ext4_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid);
if (IS_ERR(dquot))
return PTR_ERR(dquot);
- spin_lock(&dq_data_lock);
+ spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit :
@@ -5219,7 +5243,7 @@ static int ext4_statfs_project(struct super_block *sb,
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
}
- spin_unlock(&dq_data_lock);
+ spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot);
return 0;
}
@@ -5265,18 +5289,13 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-/* Helper function for writing quotas on sync - we need to start transaction
- * before quota file is locked for write. Otherwise the are possible deadlocks:
- * Process 1 Process 2
- * ext4_create() quota_sync()
- * jbd2_journal_start() write_dquot()
- * dquot_initialize() down(dqio_mutex)
- * down(dqio_mutex) jbd2_journal_start()
- *
- */
#ifdef CONFIG_QUOTA
+/*
+ * Helper functions so that transaction is started before we acquire dqio_sem
+ * to keep correct lock ordering of transaction > dqio_sem
+ */
static inline struct inode *dquot_to_inode(struct dquot *dquot)
{
return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
@@ -5411,6 +5430,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
ext4_msg(sb, KERN_WARNING,
"Quota file not on filesystem root. "
"Journaled quota will not work");
+ sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
+ } else {
+ /*
+ * Clear the flag just in case mount options changed since
+ * last time.
+ */
+ sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
}
/*
@@ -5507,13 +5533,16 @@ static int ext4_enable_quotas(struct super_block *sb)
test_opt(sb, PRJQUOTA),
};
- sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
for (type = 0; type < EXT4_MAXQUOTAS; type++) {
if (qf_inums[type]) {
err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
DQUOT_USAGE_ENABLED |
(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
if (err) {
+ for (type--; type >= 0; type--)
+ dquot_quota_off(sb, type);
+
ext4_warning(sb,
"Failed to enable quota tracking "
"(type=%d, err=%d). Please run "
@@ -5703,7 +5732,7 @@ static inline int ext2_feature_set_ok(struct super_block *sb)
{
if (ext4_has_unknown_ext2_incompat_features(sb))
return 0;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 1;
if (ext4_has_unknown_ext2_ro_compat_features(sb))
return 0;
@@ -5734,7 +5763,7 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
return 0;
if (!ext4_has_feature_journal(sb))
return 0;
- if (sb->s_flags & MS_RDONLY)
+ if (sb_rdonly(sb))
return 1;
if (ext4_has_unknown_ext3_ro_compat_features(sb))
return 0;
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 5c8fc53cb0e5..a2006c9af1d9 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/symlink.c
*
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 48c7a7d55ed3..e21afd52e7d7 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/sysfs.c
*
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index c70d06a383e2..b64a9fa0ff41 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* linux/fs/ext4/truncate.h
*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index cff4f41ced61..218a7ba57819 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/xattr.c
*
@@ -317,32 +318,47 @@ static void ext4_xattr_inode_set_hash(struct inode *ea_inode, u32 hash)
*/
static int ext4_xattr_inode_read(struct inode *ea_inode, void *buf, size_t size)
{
- unsigned long block = 0;
- struct buffer_head *bh;
- int blocksize = ea_inode->i_sb->s_blocksize;
- size_t csize, copied = 0;
- void *copy_pos = buf;
+ int blocksize = 1 << ea_inode->i_blkbits;
+ int bh_count = (size + blocksize - 1) >> ea_inode->i_blkbits;
+ int tail_size = (size % blocksize) ?: blocksize;
+ struct buffer_head *bhs_inline[8];
+ struct buffer_head **bhs = bhs_inline;
+ int i, ret;
- while (copied < size) {
- csize = (size - copied) > blocksize ? blocksize : size - copied;
- bh = ext4_bread(NULL, ea_inode, block, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
- if (!bh)
- return -EFSCORRUPTED;
+ if (bh_count > ARRAY_SIZE(bhs_inline)) {
+ bhs = kmalloc_array(bh_count, sizeof(*bhs), GFP_NOFS);
+ if (!bhs)
+ return -ENOMEM;
+ }
- memcpy(copy_pos, bh->b_data, csize);
- brelse(bh);
+ ret = ext4_bread_batch(ea_inode, 0 /* block */, bh_count,
+ true /* wait */, bhs);
+ if (ret)
+ goto free_bhs;
- copy_pos += csize;
- block += 1;
- copied += csize;
+ for (i = 0; i < bh_count; i++) {
+ /* There shouldn't be any holes in ea_inode. */
+ if (!bhs[i]) {
+ ret = -EFSCORRUPTED;
+ goto put_bhs;
+ }
+ memcpy((char *)buf + blocksize * i, bhs[i]->b_data,
+ i < bh_count - 1 ? blocksize : tail_size);
}
- return 0;
+ ret = 0;
+put_bhs:
+ for (i = 0; i < bh_count; i++)
+ brelse(bhs[i]);
+free_bhs:
+ if (bhs != bhs_inline)
+ kfree(bhs);
+ return ret;
}
+#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
+
static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
- struct inode **ea_inode)
+ u32 ea_inode_hash, struct inode **ea_inode)
{
struct inode *inode;
int err;
@@ -372,6 +388,24 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
goto error;
}
+ ext4_xattr_inode_set_class(inode);
+
+ /*
+ * Check whether this is an old Lustre-style xattr inode. Lustre
+ * implementation does not have hash validation, rather it has a
+ * backpointer from ea_inode to the parent inode.
+ */
+ if (ea_inode_hash != ext4_xattr_inode_get_hash(inode) &&
+ EXT4_XATTR_INODE_GET_PARENT(inode) == parent->i_ino &&
+ inode->i_generation == parent->i_generation) {
+ ext4_set_inode_state(inode, EXT4_STATE_LUSTRE_EA_INODE);
+ ext4_xattr_inode_set_ref(inode, 1);
+ } else {
+ inode_lock(inode);
+ inode->i_flags |= S_NOQUOTA;
+ inode_unlock(inode);
+ }
+
*ea_inode = inode;
return 0;
error:
@@ -404,8 +438,6 @@ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
return 0;
}
-#define EXT4_XATTR_INODE_GET_PARENT(inode) ((__u32)(inode)->i_mtime.tv_sec)
-
/*
* Read xattr value from the EA inode.
*/
@@ -418,7 +450,7 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
int err;
err = ext4_xattr_inode_iget(inode, le32_to_cpu(entry->e_value_inum),
- &ea_inode);
+ le32_to_cpu(entry->e_hash), &ea_inode);
if (err) {
ea_inode = NULL;
goto out;
@@ -436,28 +468,20 @@ ext4_xattr_inode_get(struct inode *inode, struct ext4_xattr_entry *entry,
if (err)
goto out;
- err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer, size);
- /*
- * Compatibility check for old Lustre ea_inode implementation. Old
- * version does not have hash validation, but it has a backpointer
- * from ea_inode to the parent inode.
- */
- if (err == -EFSCORRUPTED) {
- if (EXT4_XATTR_INODE_GET_PARENT(ea_inode) != inode->i_ino ||
- ea_inode->i_generation != inode->i_generation) {
+ if (!ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE)) {
+ err = ext4_xattr_inode_verify_hashes(ea_inode, entry, buffer,
+ size);
+ if (err) {
ext4_warning_inode(ea_inode,
"EA inode hash validation failed");
goto out;
}
- /* Do not add ea_inode to the cache. */
- ea_inode_cache = NULL;
- } else if (err)
- goto out;
- if (ea_inode_cache)
- mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
- ext4_xattr_inode_get_hash(ea_inode),
- ea_inode->i_ino, true /* reusable */);
+ if (ea_inode_cache)
+ mb_cache_entry_create(ea_inode_cache, GFP_NOFS,
+ ext4_xattr_inode_get_hash(ea_inode),
+ ea_inode->i_ino, true /* reusable */);
+ }
out:
iput(ea_inode);
return err;
@@ -824,10 +848,15 @@ static int ext4_xattr_inode_alloc_quota(struct inode *inode, size_t len)
return err;
}
-static void ext4_xattr_inode_free_quota(struct inode *inode, size_t len)
+static void ext4_xattr_inode_free_quota(struct inode *parent,
+ struct inode *ea_inode,
+ size_t len)
{
- dquot_free_space_nodirty(inode, round_up_cluster(inode, len));
- dquot_free_inode(inode);
+ if (ea_inode &&
+ ext4_test_inode_state(ea_inode, EXT4_STATE_LUSTRE_EA_INODE))
+ return;
+ dquot_free_space_nodirty(parent, round_up_cluster(parent, len));
+ dquot_free_inode(parent);
}
int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
@@ -1057,7 +1086,9 @@ static int ext4_xattr_inode_inc_ref_all(handle_t *handle, struct inode *parent,
if (!entry->e_value_inum)
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
- err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ err = ext4_xattr_inode_iget(parent, ea_ino,
+ le32_to_cpu(entry->e_hash),
+ &ea_inode);
if (err)
goto cleanup;
err = ext4_xattr_inode_inc_ref(handle, ea_inode);
@@ -1079,7 +1110,9 @@ cleanup:
if (!entry->e_value_inum)
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
- err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ err = ext4_xattr_inode_iget(parent, ea_ino,
+ le32_to_cpu(entry->e_hash),
+ &ea_inode);
if (err) {
ext4_warning(parent->i_sb,
"cleanup ea_ino %u iget error %d", ea_ino,
@@ -1117,7 +1150,9 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
if (!entry->e_value_inum)
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
- err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
+ err = ext4_xattr_inode_iget(parent, ea_ino,
+ le32_to_cpu(entry->e_hash),
+ &ea_inode);
if (err)
continue;
@@ -1145,7 +1180,7 @@ ext4_xattr_inode_dec_ref_all(handle_t *handle, struct inode *parent,
}
if (!skip_quota)
- ext4_xattr_inode_free_quota(parent,
+ ext4_xattr_inode_free_quota(parent, ea_inode,
le32_to_cpu(entry->e_value_size));
/*
@@ -1529,7 +1564,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
/* Clear padding bytes. */
memset(val + i->value_len, 0, new_size - i->value_len);
}
- return 0;
+ goto update_hash;
}
/* Compute min_offs and last. */
@@ -1577,6 +1612,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
if (!s->not_found && here->e_value_inum) {
ret = ext4_xattr_inode_iget(inode,
le32_to_cpu(here->e_value_inum),
+ le32_to_cpu(here->e_hash),
&old_ea_inode);
if (ret) {
old_ea_inode = NULL;
@@ -1595,7 +1631,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
&new_ea_inode);
if (ret) {
new_ea_inode = NULL;
- ext4_xattr_inode_free_quota(inode, i->value_len);
+ ext4_xattr_inode_free_quota(inode, NULL, i->value_len);
goto out;
}
}
@@ -1614,13 +1650,13 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
ext4_warning_inode(new_ea_inode,
"dec ref new_ea_inode err=%d",
err);
- ext4_xattr_inode_free_quota(inode,
+ ext4_xattr_inode_free_quota(inode, new_ea_inode,
i->value_len);
}
goto out;
}
- ext4_xattr_inode_free_quota(inode,
+ ext4_xattr_inode_free_quota(inode, old_ea_inode,
le32_to_cpu(here->e_value_size));
}
@@ -1693,6 +1729,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
here->e_value_size = cpu_to_le32(i->value_len);
}
+update_hash:
if (i->value) {
__le32 hash = 0;
@@ -1711,7 +1748,8 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
here->e_name_len,
&crc32c_hash, 1);
} else if (is_block) {
- __le32 *value = s->base + min_offs - new_size;
+ __le32 *value = s->base + le16_to_cpu(
+ here->e_value_offs);
hash = ext4_xattr_hash_entry(here->e_name,
here->e_name_len, value,
@@ -1789,8 +1827,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
struct mb_cache_entry *ce = NULL;
int error = 0;
struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
- struct inode *ea_inode = NULL;
- size_t old_ea_inode_size = 0;
+ struct inode *ea_inode = NULL, *tmp_inode;
+ size_t old_ea_inode_quota = 0;
+ unsigned int ea_ino;
+
#define header(x) ((struct ext4_xattr_header *)(x))
@@ -1815,9 +1855,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
- if (!error)
- ext4_xattr_block_cache_insert(ea_block_cache,
- bs->bh);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@@ -1852,12 +1889,24 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* like it has an empty value.
*/
if (!s->not_found && s->here->e_value_inum) {
- /*
- * Defer quota free call for previous inode
- * until success is guaranteed.
- */
- old_ea_inode_size = le32_to_cpu(
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
+ error = ext4_xattr_inode_iget(inode, ea_ino,
+ le32_to_cpu(s->here->e_hash),
+ &tmp_inode);
+ if (error)
+ goto cleanup;
+
+ if (!ext4_test_inode_state(tmp_inode,
+ EXT4_STATE_LUSTRE_EA_INODE)) {
+ /*
+ * Defer quota free call for previous
+ * inode until success is guaranteed.
+ */
+ old_ea_inode_quota = le32_to_cpu(
s->here->e_value_size);
+ }
+ iput(tmp_inode);
+
s->here->e_value_inum = 0;
s->here->e_value_size = 0;
}
@@ -1884,8 +1933,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
goto cleanup;
if (i->value && s->here->e_value_inum) {
- unsigned int ea_ino;
-
/*
* A ref count on ea_inode has been taken as part of the call to
* ext4_xattr_set_entry() above. We would like to drop this
@@ -1893,7 +1940,9 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* initialized and has its own ref count on the ea_inode.
*/
ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
+ error = ext4_xattr_inode_iget(inode, ea_ino,
+ le32_to_cpu(s->here->e_hash),
+ &ea_inode);
if (error) {
ea_inode = NULL;
goto cleanup;
@@ -1973,6 +2022,7 @@ inserted:
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
ea_bdebug(bs->bh, "keeping this block");
+ ext4_xattr_block_cache_insert(ea_block_cache, bs->bh);
new_bh = bs->bh;
get_bh(new_bh);
} else {
@@ -2042,8 +2092,8 @@ getblk_failed:
}
}
- if (old_ea_inode_size)
- ext4_xattr_inode_free_quota(inode, old_ea_inode_size);
+ if (old_ea_inode_quota)
+ ext4_xattr_inode_free_quota(inode, NULL, old_ea_inode_quota);
/* Update the inode. */
EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
@@ -2070,7 +2120,7 @@ cleanup:
/* If there was an error, revert the quota charge. */
if (error)
- ext4_xattr_inode_free_quota(inode,
+ ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
iput(ea_inode);
}
@@ -2625,23 +2675,21 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle)
{
struct ext4_xattr_ibody_header *header;
- struct buffer_head *bh = NULL;
+ struct buffer_head *bh;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ static unsigned int mnt_count;
size_t min_offs;
size_t ifree, bfree;
int total_ino;
void *base, *end;
int error = 0, tried_min_extra_isize = 0;
- int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+ int s_min_extra_isize = le16_to_cpu(sbi->s_es->s_min_extra_isize);
int isize_diff; /* How much do we need to grow i_extra_isize */
- int no_expand;
-
- if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
- return 0;
retry:
isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
- goto out;
+ return 0;
header = IHDR(inode, raw_inode);
@@ -2676,6 +2724,7 @@ retry:
EXT4_ERROR_INODE(inode, "bad block %llu",
EXT4_I(inode)->i_file_acl);
error = -EFSCORRUPTED;
+ brelse(bh);
goto cleanup;
}
base = BHDR(bh);
@@ -2683,11 +2732,11 @@ retry:
min_offs = end - base;
bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base,
NULL);
+ brelse(bh);
if (bfree + ifree < isize_diff) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
- brelse(bh);
goto retry;
}
error = -ENOSPC;
@@ -2705,7 +2754,6 @@ retry:
s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
- brelse(bh);
goto retry;
}
goto cleanup;
@@ -2717,18 +2765,13 @@ shift:
EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
(void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
- brelse(bh);
-out:
- ext4_write_unlock_xattr(inode, &no_expand);
- return 0;
cleanup:
- brelse(bh);
- /*
- * Inode size expansion failed; don't try again
- */
- no_expand = 1;
- ext4_write_unlock_xattr(inode, &no_expand);
+ if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
+ ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
+ inode->i_ino);
+ mnt_count = le16_to_cpu(sbi->s_es->s_mnt_count);
+ }
return error;
}
@@ -2793,6 +2836,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_ibody_header *header;
struct ext4_iloc iloc = { .bh = NULL };
struct ext4_xattr_entry *entry;
+ struct inode *ea_inode;
int error;
error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
@@ -2847,10 +2891,19 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
if (ext4_has_feature_ea_inode(inode->i_sb)) {
for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry);
- entry = EXT4_XATTR_NEXT(entry))
- if (entry->e_value_inum)
- ext4_xattr_inode_free_quota(inode,
+ entry = EXT4_XATTR_NEXT(entry)) {
+ if (!entry->e_value_inum)
+ continue;
+ error = ext4_xattr_inode_iget(inode,
+ le32_to_cpu(entry->e_value_inum),
+ le32_to_cpu(entry->e_hash),
+ &ea_inode);
+ if (error)
+ continue;
+ ext4_xattr_inode_free_quota(inode, ea_inode,
le32_to_cpu(entry->e_value_size));
+ iput(ea_inode);
+ }
}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 0d2dde1fa87a..f8cc07588ac9 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
File: fs/ext4/xattr.h
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index a8921112030d..629001b28632 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/xattr_security.c
* Handler for storing security labels as extended attributes.
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index c7765c735714..e9389e5d75c3 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/xattr_trusted.c
* Handler for trusted extended attributes.
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index ca20e423034b..d4546184b34b 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/xattr_user.c
* Handler for extended user attributes.