diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 508 |
1 files changed, 359 insertions, 149 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c064727ed62..b9ffa9f4191f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,6 +37,7 @@ #include <linux/printk.h> #include <linux/slab.h> #include <linux/bitops.h> +#include <linux/iomap.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size); offset += csum_size; - csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, - EXT4_INODE_SIZE(inode->i_sb) - - offset); } + csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, + EXT4_INODE_SIZE(inode->i_sb) - offset); } return csum; @@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode) "couldn't mark inode dirty (err %d)", err); goto stop_handle; } - if (inode->i_blocks) - ext4_truncate(inode); + if (inode->i_blocks) { + err = ext4_truncate(inode); + if (err) { + ext4_error(inode->i_sb, + "couldn't truncate inode %lu (err %d)", + inode->i_ino, err); + goto stop_handle; + } + } /* * ext4_ext_truncate() doesn't reserve any slop when it @@ -654,12 +661,8 @@ found: if (flags & EXT4_GET_BLOCKS_ZERO && map->m_flags & EXT4_MAP_MAPPED && map->m_flags & EXT4_MAP_NEW) { - ext4_lblk_t i; - - for (i = 0; i < map->m_len; i++) { - unmap_underlying_metadata(inode->i_sb->s_bdev, - map->m_pblk + i); - } + clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk, + map->m_len); ret = ext4_issue_zeroout(inode, map->m_lblk, map->m_pblk, map->m_len); if (ret) { @@ -767,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, ext4_update_bh_state(bh, map.m_flags); bh->b_size = inode->i_sb->s_blocksize * map.m_len; ret = 0; + } else if (ret == 0) { + /* hole case, need to fill in bh->b_size */ + bh->b_size = inode->i_sb->s_blocksize * map.m_len; } return ret; } @@ -1127,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (err) break; if (buffer_new(bh)) { - unmap_underlying_metadata(bh->b_bdev, - bh->b_blocknr); + clean_bdev_bh_alias(bh); if (PageUptodate(page)) { clear_buffer_new(bh); set_buffer_uptodate(bh); @@ -1166,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, if (unlikely(err)) page_zero_new_buffers(page, from, to); else if (decrypt) - err = fscrypt_decrypt_page(page); + err = fscrypt_decrypt_page(page->mapping->host, page, + PAGE_SIZE, 0, page->index); return err; } #endif @@ -1183,6 +1189,9 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, pgoff_t index; unsigned from, to; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + trace_ext4_write_begin(inode, pos, len, flags); /* * Reserve one block more for addition to orphan list in case @@ -1324,8 +1333,11 @@ static int ext4_write_end(struct file *file, if (ext4_has_inline_data(inode)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); - if (ret < 0) + if (ret < 0) { + unlock_page(page); + put_page(page); goto errout; + } copied = ret; } else copied = block_write_end(file, mapping, pos, @@ -1379,7 +1391,9 @@ errout: * set the buffer to be dirty, since in data=journalled mode we need * to call ext4_handle_dirty_metadata() instead. */ -static void zero_new_buffers(struct page *page, unsigned from, unsigned to) +static void ext4_journalled_zero_new_buffers(handle_t *handle, + struct page *page, + unsigned from, unsigned to) { unsigned int block_start = 0, block_end; struct buffer_head *head, *bh; @@ -1396,7 +1410,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to) size = min(to, block_end) - start; zero_user(page, start, size); - set_buffer_uptodate(bh); + write_end_fn(handle, bh); } clear_buffer_new(bh); } @@ -1425,18 +1439,25 @@ static int ext4_journalled_write_end(struct file *file, BUG_ON(!ext4_handle_valid(handle)); - if (ext4_has_inline_data(inode)) - copied = ext4_write_inline_data_end(inode, pos, len, - copied, page); - else { - if (copied < len) { - if (!PageUptodate(page)) - copied = 0; - zero_new_buffers(page, from+copied, to); + if (ext4_has_inline_data(inode)) { + ret = ext4_write_inline_data_end(inode, pos, len, + copied, page); + if (ret < 0) { + unlock_page(page); + put_page(page); + goto errout; } - + copied = ret; + } else if (unlikely(copied < len) && !PageUptodate(page)) { + copied = 0; + ext4_journalled_zero_new_buffers(handle, page, from, to); + } else { + if (unlikely(copied < len)) + ext4_journalled_zero_new_buffers(handle, page, + from + copied, to); ret = ext4_walk_page_buffers(handle, page_buffers(page), from, - to, &partial, write_end_fn); + from + copied, &partial, + write_end_fn); if (!partial) SetPageUptodate(page); } @@ -1462,6 +1483,7 @@ static int ext4_journalled_write_end(struct file *file, */ ext4_orphan_add(handle, inode); +errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; @@ -2028,6 +2050,12 @@ static int ext4_writepage(struct page *page, struct ext4_io_submit io_submit; bool keep_towrite = false; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { + ext4_invalidatepage(page, 0, PAGE_SIZE); + unlock_page(page); + return -EIO; + } + trace_ext4_writepage(page); size = i_size_read(inode); if (page->index == size >> PAGE_SHIFT) @@ -2193,7 +2221,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, { struct inode *inode = mpd->inode; int err; - ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) + ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) >> inode->i_blkbits; do { @@ -2360,11 +2388,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) BUG_ON(map->m_len == 0); if (map->m_flags & EXT4_MAP_NEW) { - struct block_device *bdev = inode->i_sb->s_bdev; - int i; - - for (i = 0; i < map->m_len; i++) - unmap_underlying_metadata(bdev, map->m_pblk + i); + clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk, + map->m_len); } return 0; } @@ -2406,7 +2431,8 @@ static int mpage_map_and_submit_extent(handle_t *handle, if (err < 0) { struct super_block *sb = inode->i_sb; - if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) + if (ext4_forced_shutdown(EXT4_SB(sb)) || + EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) goto invalidate_dirty_pages; /* * Let the uper layers retry transient errors. @@ -2461,8 +2487,8 @@ update_disksize: disksize = i_size; if (disksize > EXT4_I(inode)->i_disksize) EXT4_I(inode)->i_disksize = disksize; - err2 = ext4_mark_inode_dirty(handle, inode); up_write(&EXT4_I(inode)->i_data_sem); + err2 = ext4_mark_inode_dirty(handle, inode); if (err2) ext4_error(inode->i_sb, "Failed to mark inode %lu dirty", @@ -2628,6 +2654,9 @@ static int ext4_writepages(struct address_space *mapping, struct blk_plug plug; bool give_up_on_write = false; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); @@ -2664,7 +2693,8 @@ static int ext4_writepages(struct address_space *mapping, * *never* be called, so if that ever happens, we would want * the stack trace. */ - if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { + if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || + sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) { ret = -EROFS; goto out_writepages; } @@ -2889,9 +2919,13 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, struct inode *inode = mapping->host; handle_t *handle; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + index = pos >> PAGE_SHIFT; - if (ext4_nonda_switch(inode->i_sb)) { + if (ext4_nonda_switch(inode->i_sb) || + S_ISLNK(inode->i_mode)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; return ext4_write_begin(file, mapping, pos, len, flags, pagep, fsdata); @@ -3268,53 +3302,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait) } #ifdef CONFIG_FS_DAX -/* - * Get block function for DAX IO and mmap faults. It takes care of converting - * unwritten extents to written ones and initializes new / converted blocks - * to zeros. - */ -int ext4_dax_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned flags, struct iomap *iomap) { + unsigned int blkbits = inode->i_blkbits; + unsigned long first_block = offset >> blkbits; + unsigned long last_block = (offset + length - 1) >> blkbits; + struct ext4_map_blocks map; int ret; - ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create); - if (!create) - return _ext4_get_block(inode, iblock, bh_result, 0); + if (WARN_ON_ONCE(ext4_has_inline_data(inode))) + return -ERANGE; - ret = ext4_get_block_trans(inode, iblock, bh_result, - EXT4_GET_BLOCKS_PRE_IO | - EXT4_GET_BLOCKS_CREATE_ZERO); - if (ret < 0) - return ret; + map.m_lblk = first_block; + map.m_len = last_block - first_block + 1; + + if (!(flags & IOMAP_WRITE)) { + ret = ext4_map_blocks(NULL, inode, &map, 0); + } else { + int dio_credits; + handle_t *handle; + int retries = 0; - if (buffer_unwritten(bh_result)) { + /* Trim mapping request to maximum we can map at once for DIO */ + if (map.m_len > DIO_MAX_BLOCKS) + map.m_len = DIO_MAX_BLOCKS; + dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); +retry: /* - * We are protected by i_mmap_sem or i_mutex so we know block - * cannot go away from under us even though we dropped - * i_data_sem. Convert extent to written and write zeros there. + * Either we allocate blocks and then we don't get unwritten + * extent so we have reserved enough credits, or the blocks + * are already allocated and unwritten and in that case + * extent conversion fits in the credits as well. */ - ret = ext4_get_block_trans(inode, iblock, bh_result, - EXT4_GET_BLOCKS_CONVERT | - EXT4_GET_BLOCKS_CREATE_ZERO); - if (ret < 0) + handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, + dio_credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + ret = ext4_map_blocks(handle, inode, &map, + EXT4_GET_BLOCKS_CREATE_ZERO); + if (ret < 0) { + ext4_journal_stop(handle); + if (ret == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; return ret; + } + + /* + * If we added blocks beyond i_size, we need to make sure they + * will get truncated if we crash before updating i_size in + * ext4_iomap_end(). For faults we don't need to do that (and + * even cannot because for orphan list operations inode_lock is + * required) - if we happen to instantiate block beyond i_size, + * it is because we race with truncate which has already added + * the inode to the orphan list. + */ + if (!(flags & IOMAP_FAULT) && first_block + map.m_len > + (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) { + int err; + + err = ext4_orphan_add(handle, inode); + if (err < 0) { + ext4_journal_stop(handle); + return err; + } + } + ext4_journal_stop(handle); } - /* - * At least for now we have to clear BH_New so that DAX code - * doesn't attempt to zero blocks again in a racy way. - */ - clear_buffer_new(bh_result); + + iomap->flags = 0; + iomap->bdev = inode->i_sb->s_bdev; + iomap->offset = first_block << blkbits; + + if (ret == 0) { + iomap->type = IOMAP_HOLE; + iomap->blkno = IOMAP_NULL_BLOCK; + iomap->length = (u64)map.m_len << blkbits; + } else { + if (map.m_flags & EXT4_MAP_MAPPED) { + iomap->type = IOMAP_MAPPED; + } else if (map.m_flags & EXT4_MAP_UNWRITTEN) { + iomap->type = IOMAP_UNWRITTEN; + } else { + WARN_ON_ONCE(1); + return -EIO; + } + iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9); + iomap->length = (u64)map.m_len << blkbits; + } + + if (map.m_flags & EXT4_MAP_NEW) + iomap->flags |= IOMAP_F_NEW; return 0; } -#else -/* Just define empty function, it will never get called. */ -int ext4_dax_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) + +static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, + ssize_t written, unsigned flags, struct iomap *iomap) { - BUG(); - return 0; + int ret = 0; + handle_t *handle; + int blkbits = inode->i_blkbits; + bool truncate = false; + + if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) + return 0; + + handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto orphan_del; + } + if (ext4_update_inode_size(inode, offset + written)) + ext4_mark_inode_dirty(handle, inode); + /* + * We may need to truncate allocated but not written blocks beyond EOF. + */ + if (iomap->offset + iomap->length > + ALIGN(inode->i_size, 1 << blkbits)) { + ext4_lblk_t written_blk, end_blk; + + written_blk = (offset + written) >> blkbits; + end_blk = (offset + length) >> blkbits; + if (written_blk < end_blk && ext4_can_truncate(inode)) + truncate = true; + } + /* + * Remove inode from orphan list if we were extending a inode and + * everything went fine. + */ + if (!truncate && inode->i_nlink && + !list_empty(&EXT4_I(inode)->i_orphan)) + ext4_orphan_del(handle, inode); + ext4_journal_stop(handle); + if (truncate) { + ext4_truncate_failed_write(inode); +orphan_del: + /* + * If truncate failed early the inode might still be on the + * orphan list; we need to make sure the inode is removed from + * the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + return ret; } + +const struct iomap_ops ext4_iomap_ops = { + .iomap_begin = ext4_iomap_begin, + .iomap_end = ext4_iomap_end, +}; + #endif static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, @@ -3436,20 +3576,8 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) iocb->private = NULL; if (overwrite) get_block_func = ext4_dio_get_block_overwrite; - else if (IS_DAX(inode)) { - /* - * We can avoid zeroing for aligned DAX writes beyond EOF. Other - * writes need zeroing either because they can race with page - * faults or because they use partial blocks. - */ - if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size && - ext4_aligned_io(inode, offset, count)) - get_block_func = ext4_dio_get_block; - else - get_block_func = ext4_dax_get_block; - dio_flags = DIO_LOCKING; - } else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || - round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) { + else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || + round_down(offset, i_blocksize(inode)) >= inode->i_size) { get_block_func = ext4_dio_get_block; dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; } else if (is_sync_kiocb(iocb)) { @@ -3462,14 +3590,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) #ifdef CONFIG_EXT4_FS_ENCRYPTION BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)); #endif - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, iter, get_block_func, - ext4_end_io_dio, dio_flags); - } else - ret = __blockdev_direct_IO(iocb, inode, - inode->i_sb->s_bdev, iter, - get_block_func, - ext4_end_io_dio, NULL, dio_flags); + ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, + get_block_func, ext4_end_io_dio, NULL, + dio_flags); if (ret > 0 && !overwrite && ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN)) { @@ -3538,6 +3661,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; /* @@ -3546,19 +3670,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) * we are protected against page writeback as well. */ inode_lock_shared(inode); - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0); - } else { - size_t count = iov_iter_count(iter); - - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, - iocb->ki_pos + count); - if (ret) - goto out_unlock; - ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, - iter, ext4_dio_get_block, - NULL, NULL, 0); - } + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, + iocb->ki_pos + count); + if (ret) + goto out_unlock; + ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, + iter, ext4_dio_get_block, NULL, NULL, 0); out_unlock: inode_unlock_shared(inode); return ret; @@ -3587,6 +3704,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (ext4_has_inline_data(inode)) return 0; + /* DAX uses iomap path now */ + if (WARN_ON_ONCE(IS_DAX(inode))) + return 0; + trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); if (iov_iter_rw(iter) == READ) ret = ext4_direct_IO_read(iocb, iter); @@ -3615,6 +3736,13 @@ static int ext4_journalled_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } +static int ext4_set_page_dirty(struct page *page) +{ + WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); + WARN_ON_ONCE(!page_has_buffers(page)); + return __set_page_dirty_buffers(page); +} + static const struct address_space_operations ext4_aops = { .readpage = ext4_readpage, .readpages = ext4_readpages, @@ -3622,6 +3750,7 @@ static const struct address_space_operations ext4_aops = { .writepages = ext4_writepages, .write_begin = ext4_write_begin, .write_end = ext4_write_end, + .set_page_dirty = ext4_set_page_dirty, .bmap = ext4_bmap, .invalidatepage = ext4_invalidatepage, .releasepage = ext4_releasepage, @@ -3654,6 +3783,7 @@ static const struct address_space_operations ext4_da_aops = { .writepages = ext4_writepages, .write_begin = ext4_da_write_begin, .write_end = ext4_da_write_end, + .set_page_dirty = ext4_set_page_dirty, .bmap = ext4_bmap, .invalidatepage = ext4_da_invalidatepage, .releasepage = ext4_releasepage, @@ -3743,7 +3873,8 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_SIZE); - WARN_ON_ONCE(fscrypt_decrypt_page(page)); + WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, + page, PAGE_SIZE, 0, page->index)); } } if (ext4_should_journal_data(inode)) { @@ -3792,8 +3923,10 @@ static int ext4_block_zero_page_range(handle_t *handle, if (length > max || length < 0) length = max; - if (IS_DAX(inode)) - return dax_zero_page_range(inode, from, length, ext4_get_block); + if (IS_DAX(inode)) { + return iomap_zero_range(inode, from, length, NULL, + &ext4_iomap_ops); + } return __ext4_block_zero_page_range(handle, mapping, from, length); } @@ -3811,6 +3944,10 @@ static int ext4_block_truncate_page(handle_t *handle, unsigned blocksize; struct inode *inode = mapping->host; + /* If we are processing an encrypted inode during orphan list handling */ + if (ext4_encrypted_inode(inode) && !fscrypt_has_encryption_key(inode)) + return 0; + blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1)); @@ -4026,7 +4163,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) if (IS_SYNC(inode)) ext4_handle_sync(handle); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); out_stop: ext4_journal_stop(handle); @@ -4091,10 +4228,11 @@ int ext4_inode_attach_jinode(struct inode *inode) * that's fine - as long as they are linked from the inode, the post-crash * ext4_truncate() run will find them and release them. */ -void ext4_truncate(struct inode *inode) +int ext4_truncate(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); unsigned int credits; + int err = 0; handle_t *handle; struct address_space *mapping = inode->i_mapping; @@ -4108,7 +4246,7 @@ void ext4_truncate(struct inode *inode) trace_ext4_truncate_enter(inode); if (!ext4_can_truncate(inode)) - return; + return 0; ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); @@ -4118,15 +4256,17 @@ void ext4_truncate(struct inode *inode) if (ext4_has_inline_data(inode)) { int has_inline = 1; - ext4_inline_data_truncate(inode, &has_inline); + err = ext4_inline_data_truncate(inode, &has_inline); + if (err) + return err; if (has_inline) - return; + return 0; } /* If we zero-out tail of the page, we have to create jinode for jbd2 */ if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { if (ext4_inode_attach_jinode(inode) < 0) - return; + return 0; } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4135,10 +4275,8 @@ void ext4_truncate(struct inode *inode) credits = ext4_blocks_for_truncate(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); - if (IS_ERR(handle)) { - ext4_std_error(inode->i_sb, PTR_ERR(handle)); - return; - } + if (IS_ERR(handle)) + return PTR_ERR(handle); if (inode->i_size & (inode->i_sb->s_blocksize - 1)) ext4_block_truncate_page(handle, mapping, inode->i_size); @@ -4152,7 +4290,8 @@ void ext4_truncate(struct inode *inode) * Implication: the file must always be in a sane, consistent * truncatable state while each transaction commits. */ - if (ext4_orphan_add(handle, inode)) + err = ext4_orphan_add(handle, inode); + if (err) goto out_stop; down_write(&EXT4_I(inode)->i_data_sem); @@ -4160,11 +4299,13 @@ void ext4_truncate(struct inode *inode) ext4_discard_preallocations(inode); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ext4_ext_truncate(handle, inode); + err = ext4_ext_truncate(handle, inode); else ext4_ind_truncate(handle, inode); up_write(&ei->i_data_sem); + if (err) + goto out_stop; if (IS_SYNC(inode)) ext4_handle_sync(handle); @@ -4180,11 +4321,12 @@ out_stop: if (inode->i_nlink) ext4_orphan_del(handle, inode); - inode->i_mtime = inode->i_ctime = ext4_current_time(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); trace_ext4_truncate_exit(inode); + return err; } /* @@ -4352,7 +4494,9 @@ void ext4_set_inode_flags(struct inode *inode) new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) new_fl |= S_DIRSYNC; - if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) + if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) && + !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) && + !ext4_encrypted_inode(inode)) new_fl |= S_DAX; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); @@ -4411,7 +4555,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode, { __le32 *magic = (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; - if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <= + EXT4_INODE_SIZE(inode->i_sb) && + *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { ext4_set_inode_state(inode, EXT4_STATE_XATTR); ext4_find_inline_data_nolock(inode); } else @@ -4434,6 +4580,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) struct inode *inode; journal_t *journal = EXT4_SB(sb)->s_journal; long ret; + loff_t size; int block; uid_t i_uid; gid_t i_gid; @@ -4456,10 +4603,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > - EXT4_INODE_SIZE(inode->i_sb)) { - EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", - EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, - EXT4_INODE_SIZE(inode->i_sb)); + EXT4_INODE_SIZE(inode->i_sb) || + (ei->i_extra_isize & 3)) { + EXT4_ERROR_INODE(inode, + "bad extra_isize %u (inode size %u)", + ei->i_extra_isize, + EXT4_INODE_SIZE(inode->i_sb)); ret = -EFSCORRUPTED; goto bad_inode; } @@ -4534,6 +4683,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl |= ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; inode->i_size = ext4_isize(raw_inode); + if ((size = i_size_read(inode)) < 0) { + EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -4577,6 +4731,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { if (ei->i_extra_isize == 0) { /* The extra space is currently unused. Use it. */ + BUILD_BUG_ON(sizeof(struct ext4_inode) & 3); ei->i_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; } else { @@ -5024,7 +5179,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * do. We do the check mainly to optimize the common PAGE_SIZE == * blocksize case */ - if (offset > PAGE_SIZE - (1 << inode->i_blkbits)) + if (offset > PAGE_SIZE - i_blocksize(inode)) return; while (1) { page = find_lock_page(inode->i_mapping, @@ -5078,6 +5233,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) int orphan = 0; const unsigned int ia_valid = attr->ia_valid; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + error = setattr_prepare(dentry, attr); if (error) return error; @@ -5154,7 +5312,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) * update c/mtime in shrink case below */ if (!shrink) { - inode->i_mtime = ext4_current_time(inode); + inode->i_mtime = current_time(inode); inode->i_ctime = inode->i_mtime; } down_write(&EXT4_I(inode)->i_data_sem); @@ -5199,12 +5357,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) * in data=journal mode to make pages freeable. */ truncate_pagecache(inode, inode->i_size); - if (shrink) - ext4_truncate(inode); + if (shrink) { + rc = ext4_truncate(inode); + if (rc) + error = rc; + } up_write(&EXT4_I(inode)->i_mmap_sem); } - if (!rc) { + if (!error) { setattr_copy(inode, attr); mark_inode_dirty(inode); } @@ -5216,7 +5377,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (orphan && inode->i_nlink) ext4_orphan_del(NULL, inode); - if (!rc && (ia_valid & ATTR_MODE)) + if (!error && (ia_valid & ATTR_MODE)) rc = posix_acl_chmod(inode, inode->i_mode); err_out: @@ -5226,20 +5387,55 @@ err_out: return error; } -int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int ext4_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode; - unsigned long long delalloc_blocks; + struct inode *inode = d_inode(path->dentry); + struct ext4_inode *raw_inode; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned int flags; + + if (EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { + stat->result_mask |= STATX_BTIME; + stat->btime.tv_sec = ei->i_crtime.tv_sec; + stat->btime.tv_nsec = ei->i_crtime.tv_nsec; + } + + flags = ei->i_flags & EXT4_FL_USER_VISIBLE; + if (flags & EXT4_APPEND_FL) + stat->attributes |= STATX_ATTR_APPEND; + if (flags & EXT4_COMPR_FL) + stat->attributes |= STATX_ATTR_COMPRESSED; + if (flags & EXT4_ENCRYPT_FL) + stat->attributes |= STATX_ATTR_ENCRYPTED; + if (flags & EXT4_IMMUTABLE_FL) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (flags & EXT4_NODUMP_FL) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= (STATX_ATTR_APPEND | + STATX_ATTR_COMPRESSED | + STATX_ATTR_ENCRYPTED | + STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP); - inode = d_inode(dentry); generic_fillattr(inode, stat); + return 0; +} + +int ext4_file_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + u64 delalloc_blocks; + + ext4_getattr(path, stat, request_mask, query_flags); /* * If there is inline data in the inode, the inode will normally not * have data blocks allocated (it may have an external xattr block). * Report at least one sector for such files, so tools like tar, rsync, - * others doen't incorrectly think the file is completely sparse. + * others don't incorrectly think the file is completely sparse. */ if (unlikely(ext4_has_inline_data(inode))) stat->blocks += (stat->size + 511) >> 9; @@ -5361,6 +5557,9 @@ int ext4_mark_iloc_dirty(handle_t *handle, { int err = 0; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + if (IS_I_VERSION(inode)) inode_inc_iversion(inode); @@ -5384,6 +5583,9 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode, { int err; + if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) + return -EIO; + err = ext4_get_inode_loc(inode, iloc); if (!err) { BUFFER_TRACE(iloc->bh, "get_write_access"); @@ -5455,18 +5657,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) return err; - if (ext4_handle_valid(handle) && - EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && + if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { /* - * We need extra buffer credits since we may write into EA block + * In nojournal mode, we can immediately attempt to expand + * the inode. When journaled, we first need to obtain extra + * buffer credits since we may write into the EA block * with this same handle. If journal_extend fails, then it will * only result in a minor loss of functionality for that inode. * If this is felt to be critical, then e2fsck should be run to * force a large enough s_min_extra_isize. */ - if ((jbd2_journal_extend(handle, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { + if (!ext4_handle_valid(handle) || + jbd2_journal_extend(handle, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) { ret = ext4_expand_extra_isize(inode, sbi->s_want_extra_isize, iloc, handle); @@ -5620,6 +5824,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); } ext4_set_aops(inode); + /* + * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated. + * E.g. S_DAX may get cleared / set. + */ + ext4_set_inode_flags(inode); jbd2_journal_unlock_updates(journal); percpu_up_write(&sbi->s_journal_flag_rwsem); @@ -5647,8 +5856,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) return !buffer_mapped(bh); } -int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +int ext4_page_mkwrite(struct vm_fault *vmf) { + struct vm_area_struct *vma = vmf->vma; struct page *page = vmf->page; loff_t size; unsigned long len; @@ -5738,13 +5948,13 @@ out: return ret; } -int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int ext4_filemap_fault(struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int err; down_read(&EXT4_I(inode)->i_mmap_sem); - err = filemap_fault(vma, vmf); + err = filemap_fault(vmf); up_read(&EXT4_I(inode)->i_mmap_sem); return err; |