summaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c438
1 files changed, 252 insertions, 186 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3c600f02673f..0992d76f7ab1 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext4/inode.c
*
@@ -892,7 +893,7 @@ static int ext4_dio_get_block_unwritten_async(struct inode *inode,
/*
* Get block function for non-AIO DIO writes when we create unwritten extent if
* blocks are not allocated yet. The extent will be converted to written
- * after IO is complete from ext4_ext_direct_IO() function.
+ * after IO is complete by ext4_direct_IO_write().
*/
static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
sector_t iblock, struct buffer_head *bh_result, int create)
@@ -907,7 +908,7 @@ static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
/*
* Mark inode as having pending DIO writes to unwritten extents.
- * ext4_ext_direct_IO() checks this flag and converts extents to
+ * ext4_direct_IO_write() checks this flag and converts extents to
* written.
*/
if (!ret && buffer_unwritten(bh_result))
@@ -1015,6 +1016,50 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
return ERR_PTR(-EIO);
}
+/* Read a contiguous batch of blocks. */
+int ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
+ bool wait, struct buffer_head **bhs)
+{
+ int i, err;
+
+ for (i = 0; i < bh_count; i++) {
+ bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
+ if (IS_ERR(bhs[i])) {
+ err = PTR_ERR(bhs[i]);
+ bh_count = i;
+ goto out_brelse;
+ }
+ }
+
+ for (i = 0; i < bh_count; i++)
+ /* Note that NULL bhs[i] is valid because of holes. */
+ if (bhs[i] && !buffer_uptodate(bhs[i]))
+ ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1,
+ &bhs[i]);
+
+ if (!wait)
+ return 0;
+
+ for (i = 0; i < bh_count; i++)
+ if (bhs[i])
+ wait_on_buffer(bhs[i]);
+
+ for (i = 0; i < bh_count; i++) {
+ if (bhs[i] && !buffer_uptodate(bhs[i])) {
+ err = -EIO;
+ goto out_brelse;
+ }
+ }
+ return 0;
+
+out_brelse:
+ for (i = 0; i < bh_count; i++) {
+ brelse(bhs[i]);
+ bhs[i] = NULL;
+ }
+ return err;
+}
+
int ext4_walk_page_buffers(handle_t *handle,
struct buffer_head *head,
unsigned from,
@@ -1674,15 +1719,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
ext4_es_remove_extent(inode, start, last - start + 1);
}
- pagevec_init(&pvec, 0);
+ pagevec_init(&pvec);
while (index <= end) {
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
+
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (invalidate) {
@@ -1693,7 +1737,6 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
}
unlock_page(page);
}
- index = pvec.pages[nr_pages - 1]->index + 1;
pagevec_release(&pvec);
}
}
@@ -2302,19 +2345,15 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
lblk = start << bpp_bits;
pblock = mpd->map.m_pblk;
- pagevec_init(&pvec, 0);
+ pagevec_init(&pvec);
while (start <= end) {
- nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start,
- PAGEVEC_SIZE);
+ nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping,
+ &start, end);
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (page->index > end)
- break;
- /* Up to 'end' pages must be contiguous */
- BUG_ON(page->index != start);
bh = head = page_buffers(page);
do {
if (lblk < mpd->map.m_lblk)
@@ -2359,7 +2398,6 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
pagevec_release(&pvec);
return err;
}
- start++;
}
pagevec_release(&pvec);
}
@@ -2578,12 +2616,12 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
else
tag = PAGECACHE_TAG_DIRTY;
- pagevec_init(&pvec, 0);
+ pagevec_init(&pvec);
mpd->map.m_len = 0;
mpd->next_page = index;
while (index <= end) {
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
- min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
+ nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
+ tag);
if (nr_pages == 0)
goto out;
@@ -2591,16 +2629,6 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
struct page *page = pvec.pages[i];
/*
- * At this point, the page may be truncated or
- * invalidated (changing page->mapping to NULL), or
- * even swizzled back from swapper_space to tmpfs file
- * mapping. However, page->index will not change
- * because we have a reference on the page.
- */
- if (page->index > end)
- goto out;
-
- /*
* Accumulated enough dirty pages? This doesn't apply
* to WB_SYNC_ALL mode. For integrity sync we have to
* keep going because someone may be concurrently
@@ -3356,26 +3384,75 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
-#ifdef CONFIG_FS_DAX
+static bool ext4_inode_datasync_dirty(struct inode *inode)
+{
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+ if (journal)
+ return !jbd2_transaction_committed(journal,
+ EXT4_I(inode)->i_datasync_tid);
+ /* Any metadata buffers to write? */
+ if (!list_empty(&inode->i_mapping->private_list))
+ return true;
+ return inode->i_state & I_DIRTY_DATASYNC;
+}
+
static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
unsigned flags, struct iomap *iomap)
{
- struct block_device *bdev;
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
unsigned int blkbits = inode->i_blkbits;
unsigned long first_block = offset >> blkbits;
unsigned long last_block = (offset + length - 1) >> blkbits;
struct ext4_map_blocks map;
+ bool delalloc = false;
int ret;
- if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
- return -ERANGE;
+
+ if (flags & IOMAP_REPORT) {
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_inline_data_iomap(inode, iomap);
+ if (ret != -EAGAIN) {
+ if (ret == 0 && offset >= iomap->length)
+ ret = -ENOENT;
+ return ret;
+ }
+ }
+ } else {
+ if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
+ return -ERANGE;
+ }
map.m_lblk = first_block;
map.m_len = last_block - first_block + 1;
- if (!(flags & IOMAP_WRITE)) {
+ if (flags & IOMAP_REPORT) {
ret = ext4_map_blocks(NULL, inode, &map, 0);
- } else {
+ if (ret < 0)
+ return ret;
+
+ if (ret == 0) {
+ ext4_lblk_t end = map.m_lblk + map.m_len - 1;
+ struct extent_status es;
+
+ ext4_es_find_delayed_extent_range(inode, map.m_lblk, end, &es);
+
+ if (!es.es_len || es.es_lblk > end) {
+ /* entire range is a hole */
+ } else if (es.es_lblk > map.m_lblk) {
+ /* range starts with a hole */
+ map.m_len = es.es_lblk - map.m_lblk;
+ } else {
+ ext4_lblk_t offs = 0;
+
+ if (es.es_lblk < map.m_lblk)
+ offs = map.m_lblk - es.es_lblk;
+ map.m_lblk = es.es_lblk + offs;
+ map.m_len = es.es_len - offs;
+ delalloc = true;
+ }
+ }
+ } else if (flags & IOMAP_WRITE) {
int dio_credits;
handle_t *handle;
int retries = 0;
@@ -3426,21 +3503,23 @@ retry:
}
}
ext4_journal_stop(handle);
+ } else {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret < 0)
+ return ret;
}
iomap->flags = 0;
- bdev = inode->i_sb->s_bdev;
- iomap->bdev = bdev;
- if (blk_queue_dax(bdev->bd_queue))
- iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
- else
- iomap->dax_dev = NULL;
+ if (ext4_inode_datasync_dirty(inode))
+ iomap->flags |= IOMAP_F_DIRTY;
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->dax_dev = sbi->s_daxdev;
iomap->offset = first_block << blkbits;
+ iomap->length = (u64)map.m_len << blkbits;
if (ret == 0) {
- iomap->type = IOMAP_HOLE;
- iomap->blkno = IOMAP_NULL_BLOCK;
- iomap->length = (u64)map.m_len << blkbits;
+ iomap->type = delalloc ? IOMAP_DELALLOC : IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
} else {
if (map.m_flags & EXT4_MAP_MAPPED) {
iomap->type = IOMAP_MAPPED;
@@ -3450,12 +3529,12 @@ retry:
WARN_ON_ONCE(1);
return -EIO;
}
- iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9);
- iomap->length = (u64)map.m_len << blkbits;
+ iomap->addr = (u64)map.m_pblk << blkbits;
}
if (map.m_flags & EXT4_MAP_NEW)
iomap->flags |= IOMAP_F_NEW;
+
return 0;
}
@@ -3467,7 +3546,6 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
int blkbits = inode->i_blkbits;
bool truncate = false;
- fs_put_dax(iomap->dax_dev);
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0;
@@ -3517,8 +3595,6 @@ const struct iomap_ops ext4_iomap_ops = {
.iomap_end = ext4_iomap_end,
};
-#endif
-
static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
@@ -4540,6 +4616,21 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
!ext4_test_inode_state(inode, EXT4_STATE_XATTR));
}
+static bool ext4_should_use_dax(struct inode *inode)
+{
+ if (!test_opt(inode->i_sb, DAX))
+ return false;
+ if (!S_ISREG(inode->i_mode))
+ return false;
+ if (ext4_should_journal_data(inode))
+ return false;
+ if (ext4_has_inline_data(inode))
+ return false;
+ if (ext4_encrypted_inode(inode))
+ return false;
+ return true;
+}
+
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
@@ -4555,12 +4646,13 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) &&
- !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) &&
- !ext4_encrypted_inode(inode))
+ if (ext4_should_use_dax(inode))
new_fl |= S_DAX;
+ if (flags & EXT4_ENCRYPT_FL)
+ new_fl |= S_ENCRYPTED;
inode_set_flags(inode, new_fl,
- S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
+ S_ENCRYPTED);
}
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -4853,14 +4945,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
brelse(iloc.bh);
ext4_set_inode_flags(inode);
- if (ei->i_flags & EXT4_EA_INODE_FL) {
- ext4_xattr_inode_set_class(inode);
-
- inode_lock(inode);
- inode->i_flags |= S_NOQUOTA;
- inode_unlock(inode);
- }
-
unlock_new_inode(inode);
return inode;
@@ -5284,6 +5368,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (error)
return error;
+ error = fscrypt_prepare_setattr(dentry, attr);
+ if (error)
+ return error;
+
if (is_quota_modification(inode, attr)) {
error = dquot_initialize(inode);
if (error)
@@ -5329,14 +5417,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
int shrink = (attr->ia_size <= inode->i_size);
- if (ext4_encrypted_inode(inode)) {
- error = fscrypt_get_encryption_info(inode);
- if (error)
- return error;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
-
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -5658,22 +5738,16 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
return err;
}
-/*
- * Expand an inode by new_extra_isize bytes.
- * Returns 0 on success or negative error number on failure.
- */
-static int ext4_expand_extra_isize(struct inode *inode,
- unsigned int new_extra_isize,
- struct ext4_iloc iloc,
- handle_t *handle)
+static int __ext4_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc *iloc,
+ handle_t *handle, int *no_expand)
{
struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header;
+ int error;
- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
- return 0;
-
- raw_inode = ext4_raw_inode(&iloc);
+ raw_inode = ext4_raw_inode(iloc);
header = IHDR(inode, raw_inode);
@@ -5688,8 +5762,98 @@ static int ext4_expand_extra_isize(struct inode *inode,
}
/* try to expand with EAs present */
- return ext4_expand_extra_isize_ea(inode, new_extra_isize,
- raw_inode, handle);
+ error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
+ raw_inode, handle);
+ if (error) {
+ /*
+ * Inode size expansion failed; don't try again
+ */
+ *no_expand = 1;
+ }
+
+ return error;
+}
+
+/*
+ * Expand an inode by new_extra_isize bytes.
+ * Returns 0 on success or negative error number on failure.
+ */
+static int ext4_try_to_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc iloc,
+ handle_t *handle)
+{
+ int no_expand;
+ int error;
+
+ if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND))
+ return -EOVERFLOW;
+
+ /*
+ * In nojournal mode, we can immediately attempt to expand
+ * the inode. When journaled, we first need to obtain extra
+ * buffer credits since we may write into the EA block
+ * with this same handle. If journal_extend fails, then it will
+ * only result in a minor loss of functionality for that inode.
+ * If this is felt to be critical, then e2fsck should be run to
+ * force a large enough s_min_extra_isize.
+ */
+ if (ext4_handle_valid(handle) &&
+ jbd2_journal_extend(handle,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) != 0)
+ return -ENOSPC;
+
+ if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
+ return -EBUSY;
+
+ error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc,
+ handle, &no_expand);
+ ext4_write_unlock_xattr(inode, &no_expand);
+
+ return error;
+}
+
+int ext4_expand_extra_isize(struct inode *inode,
+ unsigned int new_extra_isize,
+ struct ext4_iloc *iloc)
+{
+ handle_t *handle;
+ int no_expand;
+ int error, rc;
+
+ if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
+ brelse(iloc->bh);
+ return -EOVERFLOW;
+ }
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE,
+ EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ brelse(iloc->bh);
+ return error;
+ }
+
+ ext4_write_lock_xattr(inode, &no_expand);
+
+ BUFFER_TRACE(iloc.bh, "get_write_access");
+ error = ext4_journal_get_write_access(handle, iloc->bh);
+ if (error) {
+ brelse(iloc->bh);
+ goto out_stop;
+ }
+
+ error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc,
+ handle, &no_expand);
+
+ rc = ext4_mark_iloc_dirty(handle, inode, iloc);
+ if (!error)
+ error = rc;
+
+ ext4_write_unlock_xattr(inode, &no_expand);
+out_stop:
+ ext4_journal_stop(handle);
+ return error;
}
/*
@@ -5709,44 +5873,18 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
struct ext4_iloc iloc;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- static unsigned int mnt_count;
- int err, ret;
+ int err;
might_sleep();
trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err)
return err;
- if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
- !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
- /*
- * In nojournal mode, we can immediately attempt to expand
- * the inode. When journaled, we first need to obtain extra
- * buffer credits since we may write into the EA block
- * with this same handle. If journal_extend fails, then it will
- * only result in a minor loss of functionality for that inode.
- * If this is felt to be critical, then e2fsck should be run to
- * force a large enough s_min_extra_isize.
- */
- if (!ext4_handle_valid(handle) ||
- jbd2_journal_extend(handle,
- EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {
- ret = ext4_expand_extra_isize(inode,
- sbi->s_want_extra_isize,
- iloc, handle);
- if (ret) {
- if (mnt_count !=
- le16_to_cpu(sbi->s_es->s_mnt_count)) {
- ext4_warning(inode->i_sb,
- "Unable to expand inode %lu. Delete"
- " some EAs or run e2fsck.",
- inode->i_ino);
- mnt_count =
- le16_to_cpu(sbi->s_es->s_mnt_count);
- }
- }
- }
- }
+
+ if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize)
+ ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize,
+ iloc, handle);
+
return ext4_mark_iloc_dirty(handle, inode, &iloc);
}
@@ -5884,11 +6022,6 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
}
ext4_set_aops(inode);
- /*
- * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated.
- * E.g. S_DAX may get cleared / set.
- */
- ext4_set_inode_flags(inode);
jbd2_journal_unlock_updates(journal);
percpu_up_write(&sbi->s_journal_flag_rwsem);
@@ -6024,70 +6157,3 @@ int ext4_filemap_fault(struct vm_fault *vmf)
return err;
}
-
-/*
- * Find the first extent at or after @lblk in an inode that is not a hole.
- * Search for @map_len blocks at most. The extent is returned in @result.
- *
- * The function returns 1 if we found an extent. The function returns 0 in
- * case there is no extent at or after @lblk and in that case also sets
- * @result->es_len to 0. In case of error, the error code is returned.
- */
-int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
- unsigned int map_len, struct extent_status *result)
-{
- struct ext4_map_blocks map;
- struct extent_status es = {};
- int ret;
-
- map.m_lblk = lblk;
- map.m_len = map_len;
-
- /*
- * For non-extent based files this loop may iterate several times since
- * we do not determine full hole size.
- */
- while (map.m_len > 0) {
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret < 0)
- return ret;
- /* There's extent covering m_lblk? Just return it. */
- if (ret > 0) {
- int status;
-
- ext4_es_store_pblock(result, map.m_pblk);
- result->es_lblk = map.m_lblk;
- result->es_len = map.m_len;
- if (map.m_flags & EXT4_MAP_UNWRITTEN)
- status = EXTENT_STATUS_UNWRITTEN;
- else
- status = EXTENT_STATUS_WRITTEN;
- ext4_es_store_status(result, status);
- return 1;
- }
- ext4_es_find_delayed_extent_range(inode, map.m_lblk,
- map.m_lblk + map.m_len - 1,
- &es);
- /* Is delalloc data before next block in extent tree? */
- if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
- ext4_lblk_t offset = 0;
-
- if (es.es_lblk < lblk)
- offset = lblk - es.es_lblk;
- result->es_lblk = es.es_lblk + offset;
- ext4_es_store_pblock(result,
- ext4_es_pblock(&es) + offset);
- result->es_len = es.es_len - offset;
- ext4_es_store_status(result, ext4_es_status(&es));
-
- return 1;
- }
- /* There's a hole at m_lblk, advance us after it */
- map.m_lblk += map.m_len;
- map_len -= map.m_len;
- map.m_len = map_len;
- cond_resched();
- }
- result->es_len = 0;
- return 0;
-}