From df0af1a57f72c74d53a9377c60ff20095afab97d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 29 Jan 2013 10:11:59 +0000 Subject: Btrfs: use the inode own lock to protect its delalloc_bytes We need not use a global lock to protect the delalloc_bytes of the inode, just use its own lock. In this way, we can reduce the lock contention and ->delalloc_lock will just protect delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/btrfs_inode.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 2a8c242bc4f5..c935a774a9f8 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -40,6 +40,7 @@ #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 #define BTRFS_INODE_NEEDS_FULL_SYNC 7 #define BTRFS_INODE_COPY_EVERYTHING 8 +#define BTRFS_INODE_IN_DELALLOC_LIST 9 /* in memory btrfs inode */ struct btrfs_inode { -- cgit v1.2.3 From 2e60a51e62185cce48758e596ae7cb2da673b58f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 8 Feb 2013 07:01:08 +0000 Subject: Btrfs: serialize unlocked dio reads with truncate Currently, we can do unlocked dio reads, but the following race is possible: dio_read_task truncate_task ->btrfs_setattr() ->btrfs_direct_IO ->__blockdev_direct_IO ->btrfs_get_block ->btrfs_truncate() #alloc truncated blocks #to other inode ->submit_io() #INFORMATION LEAK In order to avoid this problem, we must serialize unlocked dio reads with truncate. There are two approaches: - use extent lock to protect the extent that we truncate - use inode_dio_wait() to make sure the truncating task will wait for the read DIO. If we use the 1st one, we will meet the endless truncation problem due to the nonlocked read DIO after we implement the nonlocked write DIO. It is because we still need invoke inode_dio_wait() avoid the race between write DIO and truncation. By that time, we have to introduce btrfs_inode_{block, resume}_nolock_dio() again. That is we have to implement this patch again, so I choose the 2nd way to fix the problem. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/btrfs_inode.h | 19 +++++++++++++++++++ fs/btrfs/inode.c | 23 +++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/btrfs_inode.h') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index c935a774a9f8..d9b97d4960e6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -41,6 +41,7 @@ #define BTRFS_INODE_NEEDS_FULL_SYNC 7 #define BTRFS_INODE_COPY_EVERYTHING 8 #define BTRFS_INODE_IN_DELALLOC_LIST 9 +#define BTRFS_INODE_READDIO_NEED_LOCK 10 /* in memory btrfs inode */ struct btrfs_inode { @@ -217,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) return 0; } +/* + * Disable DIO read nolock optimization, so new dio readers will be forced + * to grab i_mutex. It is used to avoid the endless truncate due to + * nonlocked dio read. + */ +static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) +{ + set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags); + smp_mb(); +} + +static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) +{ + smp_mb__before_clear_bit(); + clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, + &BTRFS_I(inode)->runtime_flags); +} + #endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d11f38d8696c..c6ee8f1063ff 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3888,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) /* we don't support swapfiles, so vmtruncate shouldn't fail */ truncate_setsize(inode, newsize); + + /* Disable nonlocked read DIO to avoid the end less truncate */ + btrfs_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + btrfs_inode_resume_unlocked_dio(inode); + ret = btrfs_truncate(inode); if (ret && inode->i_nlink) btrfs_orphan_del(NULL, inode); @@ -6670,6 +6676,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; size_t count = 0; + int flags = 0; + bool wakeup = false; ssize_t ret; if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, @@ -6681,13 +6689,22 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) return ret; + } else { + atomic_inc(&inode->i_dio_count); + smp_mb__after_atomic_inc(); + if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, + &BTRFS_I(inode)->runtime_flags))) { + inode_dio_done(inode); + flags = DIO_LOCKING | DIO_SKIP_HOLES; + } else { + wakeup = true; + } } ret = __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, - btrfs_submit_direct, 0); - + btrfs_submit_direct, flags); if (rw & WRITE) { if (ret < 0 && ret != -EIOCBQUEUED) btrfs_delalloc_release_space(inode, count); @@ -6700,6 +6717,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, } btrfs_delalloc_release_metadata(inode, 0); } + if (wakeup) + inode_dio_done(inode); return ret; } -- cgit v1.2.3