From 68a9f5e7007c1afa2cf6830b690a90d0187c0684 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jun 2016 09:53:44 +1000 Subject: xfs: implement iomap based buffered write path Convert XFS to use the new iomap based multipage write path. This involves implementing the ->iomap_begin and ->iomap_end methods, and switching the buffered file write, page_mkwrite and xfs_iozero paths to the new iomap helpers. With this change __xfs_get_blocks will never be used for buffered writes, and the code handling them can be removed. Based on earlier code from Dave Chinner. Signed-off-by: Christoph Hellwig Reviewed-by: Bob Peterson Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 71 +++++++++++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 41 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 47fc63295422..7316d3841c53 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -37,6 +37,7 @@ #include "xfs_log.h" #include "xfs_icache.h" #include "xfs_pnfs.h" +#include "xfs_iomap.h" #include #include @@ -79,57 +80,27 @@ xfs_rw_ilock_demote( inode_unlock(VFS_I(ip)); } -/* - * xfs_iozero clears the specified range supplied via the page cache (except in - * the DAX case). Writes through the page cache will allocate blocks over holes, - * though the callers usually map the holes first and avoid them. If a block is - * not completely zeroed, then it will be read from disk before being partially - * zeroed. - * - * In the DAX case, we can just directly write to the underlying pages. This - * will not allocate blocks, but will avoid holes and unwritten extents and so - * not do unnecessary work. - */ -int -xfs_iozero( - struct xfs_inode *ip, /* inode */ - loff_t pos, /* offset in file */ - size_t count) /* size of data to zero */ +static int +xfs_dax_zero_range( + struct inode *inode, + loff_t pos, + size_t count) { - struct page *page; - struct address_space *mapping; int status = 0; - - mapping = VFS_I(ip)->i_mapping; do { unsigned offset, bytes; - void *fsdata; offset = (pos & (PAGE_SIZE -1)); /* Within page */ bytes = PAGE_SIZE - offset; if (bytes > count) bytes = count; - if (IS_DAX(VFS_I(ip))) { - status = dax_zero_page_range(VFS_I(ip), pos, bytes, - xfs_get_blocks_direct); - if (status) - break; - } else { - status = pagecache_write_begin(NULL, mapping, pos, bytes, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); - if (status) - break; - - zero_user(page, offset, bytes); + status = dax_zero_page_range(inode, pos, bytes, + xfs_get_blocks_direct); + if (status) + break; - status = pagecache_write_end(NULL, mapping, pos, bytes, - bytes, page, fsdata); - WARN_ON(status <= 0); /* can't return less than zero! */ - status = 0; - } pos += bytes; count -= bytes; } while (count); @@ -137,6 +108,24 @@ xfs_iozero( return status; } +/* + * Clear the specified ranges to zero through either the pagecache or DAX. + * Holes and unwritten extents will be left as-is as they already are zeroed. + */ +int +xfs_iozero( + struct xfs_inode *ip, + loff_t pos, + size_t count) +{ + struct inode *inode = VFS_I(ip); + + if (IS_DAX(VFS_I(ip))) + return xfs_dax_zero_range(inode, pos, count); + else + return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops); +} + int xfs_update_prealloc_flags( struct xfs_inode *ip, @@ -841,7 +830,7 @@ xfs_file_buffered_aio_write( write_retry: trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos, 0); - ret = generic_perform_write(file, from, iocb->ki_pos); + ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); if (likely(ret >= 0)) iocb->ki_pos += ret; @@ -1553,7 +1542,7 @@ xfs_filemap_page_mkwrite( if (IS_DAX(inode)) { ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault); } else { - ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); + ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); } -- cgit v1.2.3 From 459f0fbc2a827da37bbfaf3cae8da4ad8884da12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jun 2016 09:55:18 +1000 Subject: xfs: use iomap infrastructure for DAX zeroing Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 35 +---------------------------------- fs/xfs/xfs_iops.c | 9 ++------- 2 files changed, 3 insertions(+), 41 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7316d3841c53..090a90f0d02c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -80,34 +80,6 @@ xfs_rw_ilock_demote( inode_unlock(VFS_I(ip)); } -static int -xfs_dax_zero_range( - struct inode *inode, - loff_t pos, - size_t count) -{ - int status = 0; - - do { - unsigned offset, bytes; - - offset = (pos & (PAGE_SIZE -1)); /* Within page */ - bytes = PAGE_SIZE - offset; - if (bytes > count) - bytes = count; - - status = dax_zero_page_range(inode, pos, bytes, - xfs_get_blocks_direct); - if (status) - break; - - pos += bytes; - count -= bytes; - } while (count); - - return status; -} - /* * Clear the specified ranges to zero through either the pagecache or DAX. * Holes and unwritten extents will be left as-is as they already are zeroed. @@ -118,12 +90,7 @@ xfs_iozero( loff_t pos, size_t count) { - struct inode *inode = VFS_I(ip); - - if (IS_DAX(VFS_I(ip))) - return xfs_dax_zero_range(inode, pos, count); - else - return iomap_zero_range(inode, pos, count, NULL, &xfs_iomap_ops); + return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); } int diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 985a263c6144..ab820f84ed50 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -819,13 +819,8 @@ xfs_setattr_size( if (newsize > oldsize) { error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); } else { - if (IS_DAX(inode)) { - error = dax_truncate_page(inode, newsize, - xfs_get_blocks_direct); - } else { - error = iomap_truncate_page(inode, newsize, - &did_zeroing, &xfs_iomap_ops); - } + error = iomap_truncate_page(inode, newsize, &did_zeroing, + &xfs_iomap_ops); } if (error) -- cgit v1.2.3 From 7bb41db3ea160ea55cc46af07e45f7cb1e2968ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jun 2016 09:56:26 +1000 Subject: xfs: handle 64-bit length in xfs_iozero We'll want to use this code for large offsets now that we're skipping holes and unwritten extents efficiently. Also rename it to xfs_zero_range to be a bit more descriptive, and tell the caller if we actually did any zeroing. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 11 ++++++----- fs/xfs/xfs_inode.h | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 090a90f0d02c..294e5f423028 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -85,10 +85,11 @@ xfs_rw_ilock_demote( * Holes and unwritten extents will be left as-is as they already are zeroed. */ int -xfs_iozero( +xfs_zero_range( struct xfs_inode *ip, - loff_t pos, - size_t count) + xfs_off_t pos, + xfs_off_t count, + bool *did_zero) { return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); } @@ -419,7 +420,7 @@ xfs_zero_last_block( if (isize + zero_len > offset) zero_len = offset - isize; *did_zeroing = true; - return xfs_iozero(ip, isize, zero_len); + return xfs_zero_range(ip, isize, zero_len, NULL); } /* @@ -518,7 +519,7 @@ xfs_zero_eof( if ((zero_off + zero_len) > offset) zero_len = offset - zero_off; - error = xfs_iozero(ip, zero_off, zero_len); + error = xfs_zero_range(ip, zero_off, zero_len, NULL); if (error) return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index e52d7c7aeb5b..dbb0bcfc7d8f 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -434,7 +434,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip, enum xfs_prealloc_flags flags); int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); -int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); +int xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count, + bool *did_zero); loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, loff_t eof, int whence); -- cgit v1.2.3 From 570b6211b85692f408cbe47664ab2378eb9519ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Jun 2016 09:57:26 +1000 Subject: xfs: use xfs_zero_range in xfs_zero_eof We now skip holes in it, so no need to have the caller do it as well. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 128 +----------------------------------------------------- 1 file changed, 1 insertion(+), 127 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 294e5f423028..713991c22781 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -380,49 +380,6 @@ out: return ret; } -/* - * This routine is called to handle zeroing any space in the last block of the - * file that is beyond the EOF. We do this since the size is being increased - * without writing anything to that block and we don't want to read the - * garbage on the disk. - */ -STATIC int /* error (positive) */ -xfs_zero_last_block( - struct xfs_inode *ip, - xfs_fsize_t offset, - xfs_fsize_t isize, - bool *did_zeroing) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t last_fsb = XFS_B_TO_FSBT(mp, isize); - int zero_offset = XFS_B_FSB_OFFSET(mp, isize); - int zero_len; - int nimaps = 1; - int error = 0; - struct xfs_bmbt_irec imap; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - return error; - - ASSERT(nimaps > 0); - - /* - * If the block underlying isize is just a hole, then there - * is nothing to zero. - */ - if (imap.br_startblock == HOLESTARTBLOCK) - return 0; - - zero_len = mp->m_sb.sb_blocksize - zero_offset; - if (isize + zero_len > offset) - zero_len = offset - isize; - *did_zeroing = true; - return xfs_zero_range(ip, isize, zero_len, NULL); -} - /* * Zero any on disk space between the current EOF and the new, larger EOF. * @@ -441,94 +398,11 @@ xfs_zero_eof( xfs_fsize_t isize, /* current inode size */ bool *did_zeroing) { - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t start_zero_fsb; - xfs_fileoff_t end_zero_fsb; - xfs_fileoff_t zero_count_fsb; - xfs_fileoff_t last_fsb; - xfs_fileoff_t zero_off; - xfs_fsize_t zero_len; - int nimaps; - int error = 0; - struct xfs_bmbt_irec imap; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(offset > isize); trace_xfs_zero_eof(ip, isize, offset - isize); - - /* - * First handle zeroing the block on which isize resides. - * - * We only zero a part of that block so it is handled specially. - */ - if (XFS_B_FSB_OFFSET(mp, isize) != 0) { - error = xfs_zero_last_block(ip, offset, isize, did_zeroing); - if (error) - return error; - } - - /* - * Calculate the range between the new size and the old where blocks - * needing to be zeroed may exist. - * - * To get the block where the last byte in the file currently resides, - * we need to subtract one from the size and truncate back to a block - * boundary. We subtract 1 in case the size is exactly on a block - * boundary. - */ - last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; - start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); - end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); - ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); - if (last_fsb == end_zero_fsb) { - /* - * The size was only incremented on its last block. - * We took care of that above, so just return. - */ - return 0; - } - - ASSERT(start_zero_fsb <= end_zero_fsb); - while (start_zero_fsb <= end_zero_fsb) { - nimaps = 1; - zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb, - &imap, &nimaps, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - return error; - - ASSERT(nimaps > 0); - - if (imap.br_state == XFS_EXT_UNWRITTEN || - imap.br_startblock == HOLESTARTBLOCK) { - start_zero_fsb = imap.br_startoff + imap.br_blockcount; - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - continue; - } - - /* - * There are blocks we need to zero. - */ - zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); - zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); - - if ((zero_off + zero_len) > offset) - zero_len = offset - zero_off; - - error = xfs_zero_range(ip, zero_off, zero_len, NULL); - if (error) - return error; - - *did_zeroing = true; - start_zero_fsb = imap.br_startoff + imap.br_blockcount; - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - } - - return 0; + return xfs_zero_range(ip, isize, offset - isize, did_zeroing); } /* -- cgit v1.2.3 From 3176c3e0ef32963aa5f6f9754142e420a4ba5d64 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:31:42 +1000 Subject: xfs: kill ioflags Now that we have the direct I/O kiocb flag there is no real need to sample the value inside of XFS, and the invis flag was always just partially used and isn't worth keeping this infrastructure around for. This also splits the read tracepoint into buffered vs direct as we've done for writes a long time ago. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 26 +++++++++----------------- fs/xfs/xfs_inode.h | 10 ---------- fs/xfs/xfs_trace.h | 19 ++++++++----------- 3 files changed, 17 insertions(+), 38 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 47fc63295422..e51622c8e482 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -292,18 +292,12 @@ xfs_file_read_iter( struct xfs_mount *mp = ip->i_mount; size_t size = iov_iter_count(to); ssize_t ret = 0; - int ioflags = 0; xfs_fsize_t n; loff_t pos = iocb->ki_pos; XFS_STATS_INC(mp, xs_read_calls); - if (unlikely(iocb->ki_flags & IOCB_DIRECT)) - ioflags |= XFS_IO_ISDIRECT; - if (file->f_mode & FMODE_NOCMTIME) - ioflags |= XFS_IO_INVIS; - - if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) { + if ((iocb->ki_flags & IOCB_DIRECT) && !IS_DAX(inode)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -336,7 +330,7 @@ xfs_file_read_iter( * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) { + if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); @@ -370,7 +364,10 @@ xfs_file_read_iter( xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } - trace_xfs_file_read(ip, size, pos, ioflags); + if (iocb->ki_flags & IOCB_DIRECT) + trace_xfs_file_direct_read(ip, size, pos); + else + trace_xfs_file_buffered_read(ip, size, pos); ret = generic_file_read_iter(iocb, to); if (ret > 0) @@ -389,18 +386,14 @@ xfs_file_splice_read( unsigned int flags) { struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); - int ioflags = 0; ssize_t ret; XFS_STATS_INC(ip->i_mount, xs_read_calls); - if (infilp->f_mode & FMODE_NOCMTIME) - ioflags |= XFS_IO_INVIS; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); + trace_xfs_file_splice_read(ip, count, *ppos); /* * DAX inodes cannot ues the page cache for splice, so we have to push @@ -789,7 +782,7 @@ xfs_file_dio_aio_write( iolock = XFS_IOLOCK_SHARED; } - trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); + trace_xfs_file_direct_write(ip, count, iocb->ki_pos); data = *from; ret = mapping->a_ops->direct_IO(iocb, &data); @@ -839,8 +832,7 @@ xfs_file_buffered_aio_write( current->backing_dev_info = inode_to_bdi(inode); write_retry: - trace_xfs_file_buffered_write(ip, iov_iter_count(from), - iocb->ki_pos, 0); + trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); ret = generic_perform_write(file, from, iocb->ki_pos); if (likely(ret >= 0)) iocb->ki_pos += ret; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index e52d7c7aeb5b..57b66d2bbc45 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -479,14 +479,4 @@ do { \ extern struct kmem_zone *xfs_inode_zone; -/* - * Flags for read/write calls - */ -#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */ -#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */ - -#define XFS_IO_FLAGS \ - { XFS_IO_ISDIRECT, "DIRECT" }, \ - { XFS_IO_INVIS, "INVIS"} - #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ea94ee0fe5ea..a1bc5c64a573 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1134,15 +1134,14 @@ TRACE_EVENT(xfs_log_assign_tail_lsn, ) DECLARE_EVENT_CLASS(xfs_file_class, - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), - TP_ARGS(ip, count, offset, flags), + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset), + TP_ARGS(ip, count, offset), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_fsize_t, size) __field(loff_t, offset) __field(size_t, count) - __field(int, flags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; @@ -1150,23 +1149,21 @@ DECLARE_EVENT_CLASS(xfs_file_class, __entry->size = ip->i_d.di_size; __entry->offset = offset; __entry->count = count; - __entry->flags = flags; ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx " - "offset 0x%llx count 0x%zx ioflags %s", + TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->size, __entry->offset, - __entry->count, - __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) + __entry->count) ) #define DEFINE_RW_EVENT(name) \ DEFINE_EVENT(xfs_file_class, name, \ - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ - TP_ARGS(ip, count, offset, flags)) -DEFINE_RW_EVENT(xfs_file_read); + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset), \ + TP_ARGS(ip, count, offset)) +DEFINE_RW_EVENT(xfs_file_buffered_read); +DEFINE_RW_EVENT(xfs_file_direct_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); -- cgit v1.2.3 From cf810712cc82cbfab8f08a46ca6c0289d386a303 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:31:53 +1000 Subject: xfs: remove s_maxbytes enforcement in xfs_file_read_iter All the three low-level read implementations that we might call already take care of not overflowing the maximum supported bytes, no need to duplicate it here. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e51622c8e482..7ec8225b7fd2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -292,7 +292,6 @@ xfs_file_read_iter( struct xfs_mount *mp = ip->i_mount; size_t size = iov_iter_count(to); ssize_t ret = 0; - xfs_fsize_t n; loff_t pos = iocb->ki_pos; XFS_STATS_INC(mp, xs_read_calls); @@ -309,13 +308,6 @@ xfs_file_read_iter( } } - n = mp->m_super->s_maxbytes - pos; - if (n <= 0 || size == 0) - return 0; - - if (n < size) - size = n; - if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; -- cgit v1.2.3 From bbc5a740c4f27a9732a3a3decf3186b4bce21108 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:35:42 +1000 Subject: xfs: split xfs_file_read_iter into buffered and direct I/O helpers Similar to what we did on the write side a while ago. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 83 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 26 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 7ec8225b7fd2..fdb123ffd616 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -282,35 +282,33 @@ xfs_file_fsync( } STATIC ssize_t -xfs_file_read_iter( +xfs_file_dio_aio_read( struct kiocb *iocb, struct iov_iter *to) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - size_t size = iov_iter_count(to); + size_t count = iov_iter_count(to); + struct xfs_buftarg *target; ssize_t ret = 0; - loff_t pos = iocb->ki_pos; - XFS_STATS_INC(mp, xs_read_calls); + trace_xfs_file_direct_read(ip, count, iocb->ki_pos); - if ((iocb->ki_flags & IOCB_DIRECT) && !IS_DAX(inode)) { - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; + if (XFS_IS_REALTIME_INODE(ip)) + target = ip->i_mount->m_rtdev_targp; + else + target = ip->i_mount->m_ddev_targp; + + if (!IS_DAX(inode)) { /* DIO must be aligned to device logical sector size */ - if ((pos | size) & target->bt_logical_sectormask) { - if (pos == i_size_read(inode)) + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { + if (iocb->ki_pos == i_size_read(inode)) return 0; return -EINVAL; } } - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - /* * Locking is a bit tricky here. If we take an exclusive lock for direct * IO, we effectively serialise all new concurrent read IO to this file @@ -322,7 +320,7 @@ xfs_file_read_iter( * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - if ((iocb->ki_flags & IOCB_DIRECT) && inode->i_mapping->nrpages) { + if (mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); @@ -337,8 +335,8 @@ xfs_file_read_iter( * flush and reduce the chances of repeated iolock cycles going * forward. */ - if (inode->i_mapping->nrpages) { - ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); + if (mapping->nrpages) { + ret = filemap_write_and_wait(mapping); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; @@ -349,23 +347,56 @@ xfs_file_read_iter( * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ - ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); + ret = invalidate_inode_pages2(mapping); WARN_ON_ONCE(ret); ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } + ret = generic_file_read_iter(iocb, to); + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + + return ret; +} + +STATIC ssize_t +xfs_file_buffered_aio_read( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); + ssize_t ret; + + trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + ret = generic_file_read_iter(iocb, to); + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + + return ret; +} + +STATIC ssize_t +xfs_file_read_iter( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; + ssize_t ret = 0; + + XFS_STATS_INC(mp, xs_read_calls); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + if (iocb->ki_flags & IOCB_DIRECT) - trace_xfs_file_direct_read(ip, size, pos); + ret = xfs_file_dio_aio_read(iocb, to); else - trace_xfs_file_buffered_read(ip, size, pos); + ret = xfs_file_buffered_aio_read(iocb, to); - ret = generic_file_read_iter(iocb, to); if (ret > 0) XFS_STATS_ADD(mp, xs_read_bytes, ret); - - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } @@ -747,7 +778,7 @@ xfs_file_dio_aio_write( end = iocb->ki_pos + count - 1; /* - * See xfs_file_read_iter() for why we do a full-file flush here. + * See xfs_file_dio_aio_read() for why we do a full-file flush here. */ if (mapping->nrpages) { ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); -- cgit v1.2.3 From f1285ff0acf9040a39921355d07bd83a3308c402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:36:57 +1000 Subject: xfs: stop using generic_file_read_iter for direct I/O XFS already implement it's own flushing of the pagecache because it implements proper synchronization for direct I/O reads. This means calling generic_file_read_iter for direct I/O is rather useless, as it doesn't do much but updating the atime and iocb position for us. This also gets rid of the buffered I/O fallback that isn't used for XFS. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index fdb123ffd616..440bb8b5c64d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -289,12 +289,17 @@ xfs_file_dio_aio_read( struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); + loff_t isize = i_size_read(inode); size_t count = iov_iter_count(to); + struct iov_iter data; struct xfs_buftarg *target; ssize_t ret = 0; trace_xfs_file_direct_read(ip, count, iocb->ki_pos); + if (!count) + return 0; /* skip atime */ + if (XFS_IS_REALTIME_INODE(ip)) target = ip->i_mount->m_rtdev_targp; else @@ -303,7 +308,7 @@ xfs_file_dio_aio_read( if (!IS_DAX(inode)) { /* DIO must be aligned to device logical sector size */ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { - if (iocb->ki_pos == i_size_read(inode)) + if (iocb->ki_pos == isize) return 0; return -EINVAL; } @@ -354,9 +359,15 @@ xfs_file_dio_aio_read( xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } - ret = generic_file_read_iter(iocb, to); + data = *to; + ret = mapping->a_ops->direct_IO(iocb, &data); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(to, ret); + } xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + file_accessed(iocb->ki_filp); return ret; } -- cgit v1.2.3 From fa8d972d055c723cc427e14d4d7919640f418730 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:38:01 +1000 Subject: xfs: direct calls in the direct I/O path We control both the callers and callees of ->direct_IO, so remove the indirect calls. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 24 +++++------------------- fs/xfs/xfs_aops.h | 3 +++ fs/xfs/xfs_file.c | 17 +++++++++++++++-- 3 files changed, 23 insertions(+), 21 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4c463b99fe57..3ba0809e0be8 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1336,7 +1336,7 @@ xfs_get_blocks_dax_fault( * whereas if we have flags set we will always be called in task context * (i.e. from a workqueue). */ -STATIC int +int xfs_end_io_direct_write( struct kiocb *iocb, loff_t offset, @@ -1407,24 +1407,10 @@ xfs_vm_direct_IO( struct kiocb *iocb, struct iov_iter *iter) { - struct inode *inode = iocb->ki_filp->f_mapping->host; - dio_iodone_t *endio = NULL; - int flags = 0; - struct block_device *bdev; - - if (iov_iter_rw(iter) == WRITE) { - endio = xfs_end_io_direct_write; - flags = DIO_ASYNC_EXTEND; - } - - if (IS_DAX(inode)) { - return dax_do_io(iocb, inode, iter, - xfs_get_blocks_direct, endio, 0); - } - - bdev = xfs_find_bdev_for_inode(inode); - return __blockdev_direct_IO(iocb, inode, bdev, iter, - xfs_get_blocks_direct, endio, NULL, flags); + /* + * We just need the method present so that open/fcntl allow direct I/O. + */ + return -EINVAL; } /* diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h index 814aab790713..bf2d9a141a73 100644 --- a/fs/xfs/xfs_aops.h +++ b/fs/xfs/xfs_aops.h @@ -60,6 +60,9 @@ int xfs_get_blocks_direct(struct inode *inode, sector_t offset, int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset, struct buffer_head *map_bh, int create); +int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset, + ssize_t size, void *private); + extern void xfs_count_page_state(struct page *, int *, int *); extern struct block_device *xfs_find_bdev_for_inode(struct inode *); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 440bb8b5c64d..dd5185dafc9f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -360,7 +360,13 @@ xfs_file_dio_aio_read( } data = *to; - ret = mapping->a_ops->direct_IO(iocb, &data); + if (IS_DAX(inode)) { + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, + NULL, 0); + } else { + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, NULL, NULL, 0); + } if (ret > 0) { iocb->ki_pos += ret; iov_iter_advance(to, ret); @@ -819,7 +825,14 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); data = *from; - ret = mapping->a_ops->direct_IO(iocb, &data); + if (IS_DAX(inode)) { + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, + xfs_end_io_direct_write, 0); + } else { + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, xfs_end_io_direct_write, + NULL, DIO_ASYNC_EXTEND); + } /* see generic_file_direct_write() for why this is necessary */ if (mapping->nrpages) { -- cgit v1.2.3 From 16d4d43595b4780daac8fcea6d042689124cb094 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 20 Jul 2016 11:38:55 +1000 Subject: xfs: split direct I/O and DAX path So far the DAX code overloaded the direct I/O code path. There is very little in common between the two, and untangling them allows to clean up both variants. As a side effect we also get separate trace points for both I/O types. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 139 ++++++++++++++++++++++++++++++++++++++++++----------- fs/xfs/xfs_trace.h | 2 + 2 files changed, 112 insertions(+), 29 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index dd5185dafc9f..d97e8cb99a59 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -305,13 +305,11 @@ xfs_file_dio_aio_read( else target = ip->i_mount->m_ddev_targp; - if (!IS_DAX(inode)) { - /* DIO must be aligned to device logical sector size */ - if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { - if (iocb->ki_pos == isize) - return 0; - return -EINVAL; - } + /* DIO must be aligned to device logical sector size */ + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { + if (iocb->ki_pos == isize) + return 0; + return -EINVAL; } /* @@ -360,13 +358,37 @@ xfs_file_dio_aio_read( } data = *to; - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - NULL, 0); - } else { - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, - xfs_get_blocks_direct, NULL, NULL, 0); + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, NULL, NULL, 0); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(to, ret); } + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + + file_accessed(iocb->ki_filp); + return ret; +} + +STATIC ssize_t +xfs_file_dax_read( + struct kiocb *iocb, + struct iov_iter *to) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct iov_iter data = *to; + size_t count = iov_iter_count(to); + ssize_t ret = 0; + + trace_xfs_file_dax_read(ip, count, iocb->ki_pos); + + if (!count) + return 0; /* skip atime */ + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0); if (ret > 0) { iocb->ki_pos += ret; iov_iter_advance(to, ret); @@ -399,7 +421,8 @@ xfs_file_read_iter( struct kiocb *iocb, struct iov_iter *to) { - struct xfs_mount *mp = XFS_I(file_inode(iocb->ki_filp))->i_mount; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_mount *mp = XFS_I(inode)->i_mount; ssize_t ret = 0; XFS_STATS_INC(mp, xs_read_calls); @@ -407,7 +430,9 @@ xfs_file_read_iter( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if (iocb->ki_flags & IOCB_DIRECT) + if (IS_DAX(inode)) + ret = xfs_file_dax_read(iocb, to); + else if (iocb->ki_flags & IOCB_DIRECT) ret = xfs_file_dio_aio_read(iocb, to); else ret = xfs_file_buffered_aio_read(iocb, to); @@ -755,8 +780,7 @@ xfs_file_dio_aio_write( mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ - if (!IS_DAX(inode) && - ((iocb->ki_pos | count) & target->bt_logical_sectormask)) + if ((iocb->ki_pos | count) & target->bt_logical_sectormask) return -EINVAL; /* "unaligned" here means not aligned to a filesystem block */ @@ -825,14 +849,9 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); data = *from; - if (IS_DAX(inode)) { - ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, - xfs_end_io_direct_write, 0); - } else { - ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, - xfs_get_blocks_direct, xfs_end_io_direct_write, - NULL, DIO_ASYNC_EXTEND); - } + ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, + xfs_get_blocks_direct, xfs_end_io_direct_write, + NULL, DIO_ASYNC_EXTEND); /* see generic_file_direct_write() for why this is necessary */ if (mapping->nrpages) { @@ -849,10 +868,70 @@ out: xfs_rw_iunlock(ip, iolock); /* - * No fallback to buffered IO on errors for XFS. DAX can result in - * partial writes, but direct IO will either complete fully or fail. + * No fallback to buffered IO on errors for XFS, direct IO will either + * complete fully or fail. + */ + ASSERT(ret < 0 || ret == count); + return ret; +} + +STATIC ssize_t +xfs_file_dax_write( + struct kiocb *iocb, + struct iov_iter *from) +{ + struct address_space *mapping = iocb->ki_filp->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + int unaligned_io = 0; + int iolock; + struct iov_iter data; + + /* "unaligned" here means not aligned to a filesystem block */ + if ((iocb->ki_pos & mp->m_blockmask) || + ((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) { + unaligned_io = 1; + iolock = XFS_IOLOCK_EXCL; + } else if (mapping->nrpages) { + iolock = XFS_IOLOCK_EXCL; + } else { + iolock = XFS_IOLOCK_SHARED; + } + xfs_rw_ilock(ip, iolock); + + ret = xfs_file_aio_write_checks(iocb, from, &iolock); + if (ret) + goto out; + + /* + * Yes, even DAX files can have page cache attached to them: A zeroed + * page is inserted into the pagecache when we have to serve a write + * fault on a hole. It should never be dirtied and can simply be + * dropped from the pagecache once we get real data for the page. */ - ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip))); + if (mapping->nrpages) { + ret = invalidate_inode_pages2(mapping); + WARN_ON_ONCE(ret); + } + + if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) { + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos); + + data = *from; + ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, + xfs_end_io_direct_write, 0); + if (ret > 0) { + iocb->ki_pos += ret; + iov_iter_advance(from, ret); + } +out: + xfs_rw_iunlock(ip, iolock); return ret; } @@ -934,7 +1013,9 @@ xfs_file_write_iter( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; - if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) + if (IS_DAX(inode)) + ret = xfs_file_dax_write(iocb, from); + else if (iocb->ki_flags & IOCB_DIRECT) ret = xfs_file_dio_aio_write(iocb, from); else ret = xfs_file_buffered_aio_write(iocb, from); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a1bc5c64a573..c2876917dd89 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1164,8 +1164,10 @@ DEFINE_EVENT(xfs_file_class, name, \ TP_ARGS(ip, count, offset)) DEFINE_RW_EVENT(xfs_file_buffered_read); DEFINE_RW_EVENT(xfs_file_direct_read); +DEFINE_RW_EVENT(xfs_file_dax_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); +DEFINE_RW_EVENT(xfs_file_dax_write); DEFINE_RW_EVENT(xfs_file_splice_read); DECLARE_EVENT_CLASS(xfs_page_class, -- cgit v1.2.3 From f021bd071f06b545926b1031348873b05442139f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Jul 2016 09:50:55 +1000 Subject: xfs: remove dax code from object file when disabled We check IS_DAX(inode) before calling either xfs_file_dax_read or xfs_file_dax_write, and this will lead the call being optimized out at compile time when CONFIG_FS_DAX is disabled. However, the two functions are marked STATIC, so they become global symbols when CONFIG_XFS_DEBUG is set, leaving us with two unused global functions that call into an undefined function and a broken "allmodconfig" build: fs/built-in.o: In function `xfs_file_dax_read': fs/xfs/xfs_file.c:348: undefined reference to `dax_do_io' fs/built-in.o: In function `xfs_file_dax_write': fs/xfs/xfs_file.c:758: undefined reference to `dax_do_io' Marking the two functions 'static noinline' instead of 'STATIC' will let the compiler drop the symbols when there are no callers but avoid the implicit inlining. Signed-off-by: Arnd Bergmann Fixes: 16d4d43595b4 ("xfs: split direct I/O and DAX path") Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/xfs/xfs_file.c') diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d97e8cb99a59..49fc9aca92d5 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -370,7 +370,7 @@ xfs_file_dio_aio_read( return ret; } -STATIC ssize_t +static noinline ssize_t xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) @@ -875,7 +875,7 @@ out: return ret; } -STATIC ssize_t +static noinline ssize_t xfs_file_dax_write( struct kiocb *iocb, struct iov_iter *from) -- cgit v1.2.3