From 0e2f2b23672055e1301c11509d5b8eda7b718d8e Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Tue, 11 Sep 2012 08:28:18 +0800 Subject: writeback: correct comment for move_expired_inodes() The function scans @delaying_queue and stops at the first inode whose dirtied_when is after *work->older_than_this. So the expired ones being moved are those before *work->older_than_this. Correct the comment here. Reviewed-by: Jan Kara Signed-off-by: Wang Sheng-Hui --- fs/fs-writeback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be3efc4f64f4..fd255c03d864 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -248,7 +248,7 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) } /* - * Move expired (dirtied after work->older_than_this) dirty inodes from + * Move expired (dirtied before work->older_than_this) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, -- cgit v1.2.3 From 00d4e7362ed01987183e9528295de3213031309c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 19 Sep 2012 22:42:36 -0400 Subject: ext4: fix potential deadlock in ext4_nonda_switch() In ext4_nonda_switch(), if the file system is getting full we used to call writeback_inodes_sb_if_idle(). The problem is that we can be holding i_mutex already, and this causes a potential deadlock when writeback_inodes_sb_if_idle() when it tries to take s_umount. (See lockdep output below). As it turns out we don't need need to hold s_umount; the fact that we are in the middle of the write(2) system call will keep the superblock pinned. Unfortunately writeback_inodes_sb() checks to make sure s_umount is taken, and the VFS uses a different mechanism for making sure the file system doesn't get unmounted out from under us. The simplest way of dealing with this is to just simply grab s_umount using a trylock, and skip kicking the writeback flusher thread in the very unlikely case that we can't take a read lock on s_umount without blocking. Also, we now check the cirteria for kicking the writeback thread before we decide to whether to fall back to non-delayed writeback, so if there are any outstanding delayed allocation writes, we try to get them resolved as soon as possible. [ INFO: possible circular locking dependency detected ] 3.6.0-rc1-00042-gce894ca #367 Not tainted ------------------------------------------------------- dd/8298 is trying to acquire lock: (&type->s_umount_key#18){++++..}, at: [] writeback_inodes_sb_if_idle+0x28/0x46 but task is already holding lock: (&sb->s_type->i_mutex_key#8){+.+...}, at: [] generic_file_aio_write+0x5f/0xd3 which lock already depends on the new lock. 2 locks held by dd/8298: #0: (sb_writers#2){.+.+.+}, at: [] generic_file_aio_write+0x56/0xd3 #1: (&sb->s_type->i_mutex_key#8){+.+...}, at: [] generic_file_aio_write+0x5f/0xd3 stack backtrace: Pid: 8298, comm: dd Not tainted 3.6.0-rc1-00042-gce894ca #367 Call Trace: [] ? console_unlock+0x345/0x372 [] print_circular_bug+0x190/0x19d [] __lock_acquire+0x86d/0xb6c [] ? mark_held_locks+0x5c/0x7b [] lock_acquire+0x66/0xb9 [] ? writeback_inodes_sb_if_idle+0x28/0x46 [] down_read+0x28/0x58 [] ? writeback_inodes_sb_if_idle+0x28/0x46 [] writeback_inodes_sb_if_idle+0x28/0x46 [] ext4_nonda_switch+0xe1/0xf4 [] ext4_da_write_begin+0x27/0x193 [] generic_file_buffered_write+0xc8/0x1bb [] __generic_file_aio_write+0x1dd/0x205 [] generic_file_aio_write+0x78/0xd3 [] ext4_file_write+0x480/0x4a6 [] ? __lock_acquire+0x41e/0xb6c [] ? sched_clock_cpu+0x11a/0x13e [] ? trace_hardirqs_off+0xb/0xd [] ? local_clock+0x37/0x4e [] do_sync_write+0x67/0x9d [] ? wait_on_retry_sync_kiocb+0x44/0x44 [] vfs_write+0x7b/0xe6 [] sys_write+0x3b/0x64 [] syscall_call+0x7/0xb Signed-off-by: "Theodore Ts'o" Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 17 ++++++++++------- fs/fs-writeback.c | 1 + 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ca76b5ed6c9e..0a31197590d7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2462,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb) free_blocks = EXT4_C2B(sbi, percpu_counter_read_positive(&sbi->s_freeclusters_counter)); dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); + /* + * Start pushing delalloc when 1/2 of free blocks are dirty. + */ + if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && + !writeback_in_progress(sb->s_bdi) && + down_read_trylock(&sb->s_umount)) { + writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); + up_read(&sb->s_umount); + } + if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { /* @@ -2470,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb) */ return 1; } - /* - * Even if we don't switch but are nearing capacity, - * start pushing delalloc when 1/2 of free blocks are dirty. - */ - if (free_blocks < 2 * dirty_blocks) - writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE); - return 0; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index be3efc4f64f4..5602d73d4ec7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi) { return test_bit(BDI_writeback_running, &bdi->state); } +EXPORT_SYMBOL(writeback_in_progress); static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { -- cgit v1.2.3 From cd8ed2a45a401cb692d769e92de7d73aa42fabce Mon Sep 17 00:00:00 2001 From: Yan Hong Date: Mon, 8 Oct 2012 16:33:45 -0700 Subject: fs/fs-writeback.c: remove unneccesary parameter of __writeback_single_inode() The parameter 'wb' is never used in this function. Signed-off-by: Yan Hong Acked-by: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/fs-writeback.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8e1d7b9e4a33..401b6c6248ae 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -439,8 +439,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * setting I_SYNC flag and calling inode_sync_complete() to clear it. */ static int -__writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, - struct writeback_control *wbc) +__writeback_single_inode(struct inode *inode, struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; long nr_to_write = wbc->nr_to_write; @@ -527,7 +526,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); - ret = __writeback_single_inode(inode, wb, wbc); + ret = __writeback_single_inode(inode, wbc); spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); @@ -670,7 +669,7 @@ static long writeback_sb_inodes(struct super_block *sb, * We use I_SYNC to pin the inode in memory. While it is set * evict_inode() will wait so the inode cannot be freed. */ - __writeback_single_inode(inode, wb, &wbc); + __writeback_single_inode(inode, &wbc); work->nr_pages -= write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write; -- cgit v1.2.3