From 9924a92a8c217576bd2a2b1bbbb854462f1a00ae Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 8 Feb 2013 21:59:22 -0500 Subject: ext4: pass context information to jbd2__journal_start() So we can better understand what bits of ext4 are responsible for long-running jbd2 handles, use jbd2__journal_start() so we can pass context information for logging purposes. The recommended way for finding the longer-running handles is: T=/sys/kernel/debug/tracing EVENT=$T/events/jbd2/jbd2_handle_stats echo "interval > 5" > $EVENT/filter echo 1 > $EVENT/enable ./run-my-fs-benchmark cat $T/trace > /tmp/problem-handles This will list handles that were active for longer than 20ms. Having longer-running handles is bad, because a commit started at the wrong time could stall for those 20+ milliseconds, which could delay an fsync() or an O_SYNC operation. Here is an example line from the trace file describing a handle which lived on for 311 jiffies, or over 1.2 seconds: postmark-2917 [000] .... 196.435786: jbd2_handle_stats: dev 254,32 tid 570 type 2 line_no 2541 interval 311 sync 0 requested_blocks 1 dirtied_blocks 0 Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 3f32c8012447..10bd6fecc9ff 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1137,7 +1137,7 @@ int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) goto out; - handle = ext4_journal_start_sb(sb, 1); + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; -- cgit v1.2.3 From 1139575a927010390c6b38e4215a6d741b056074 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 9 Feb 2013 16:27:09 -0500 Subject: ext4: start handle at the last possible moment when creating inodes In ext4_{create,mknod,mkdir,symlink}(), don't start the journal handle until the inode has been succesfully allocated. In order to do this, we need to start the handle in the ext4_new_inode(). So create a new variant of this function, ext4_new_inode_start_handle(), so the handle can be created at the last possible minute, before we need to modify the inode allocation bitmap block. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 17 ++++++++-- fs/ext4/ialloc.c | 15 +++++++-- fs/ext4/namei.c | 94 ++++++++++++++++++++++++++------------------------------ 3 files changed, 71 insertions(+), 55 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 61ecf059f70c..fc1c0375c9f2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2004,9 +2004,20 @@ extern int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo); /* ialloc.c */ -extern struct inode *ext4_new_inode(handle_t *, struct inode *, umode_t, - const struct qstr *qstr, __u32 goal, - uid_t *owner); +extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t, + const struct qstr *qstr, __u32 goal, + uid_t *owner, int handle_type, + unsigned int line_no, int nblocks); + +#define ext4_new_inode(handle, dir, mode, qstr, goal, owner) \ + __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \ + 0, 0, 0) +#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \ + type, nblocks) \ + __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \ + (type), __LINE__, (nblocks)) + + extern void ext4_free_inode(handle_t *, struct inode *); extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 10bd6fecc9ff..91d8fe3aced3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -634,8 +634,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * For other inodes, search forward from the parent directory's block * group to find a free inode. */ -struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, umode_t mode, - const struct qstr *qstr, __u32 goal, uid_t *owner) +struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir, + umode_t mode, const struct qstr *qstr, + __u32 goal, uid_t *owner, int handle_type, + unsigned int line_no, int nblocks) { struct super_block *sb; struct buffer_head *inode_bitmap_bh = NULL; @@ -725,6 +727,15 @@ repeat_in_this_group: "inode=%lu", ino + 1); continue; } + if (!handle) { + BUG_ON(nblocks <= 0); + handle = __ext4_journal_start_sb(dir->i_sb, line_no, + handle_type, nblocks); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto fail; + } + } BUFFER_TRACE(inode_bitmap_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, inode_bitmap_bh); if (err) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5f3d2b569c63..3e1529c39808 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2257,30 +2257,28 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, { handle_t *handle; struct inode *inode; - int err, retries = 0; + int err, credits, retries = 0; dquot_initialize(dir); + credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); retry: - handle = ext4_journal_start(dir, EXT4_HT_DIR, - (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb))); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0, + NULL, EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); err = ext4_add_nondir(handle, dentry, inode); + if (!err && IS_DIRSYNC(dir)) + ext4_handle_sync(handle); } - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; @@ -2291,32 +2289,30 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, { handle_t *handle; struct inode *inode; - int err, retries = 0; + int err, credits, retries = 0; if (!new_valid_dev(rdev)) return -EINVAL; dquot_initialize(dir); + credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); retry: - handle = ext4_journal_start(dir, EXT4_HT_DIR, - (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb))); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0, + NULL, EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &ext4_special_inode_operations; err = ext4_add_nondir(handle, dentry, inode); + if (!err && IS_DIRSYNC(dir)) + ext4_handle_sync(handle); } - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; @@ -2408,26 +2404,21 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { handle_t *handle; struct inode *inode; - int err, retries = 0; + int err, credits, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; dquot_initialize(dir); + credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); retry: - handle = ext4_journal_start(dir, EXT4_HT_DIR, - (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb))); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, S_IFDIR | mode, - &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode, + &dentry->d_name, + 0, NULL, EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -2455,8 +2446,12 @@ out_clear_inode: goto out_clear_inode; unlock_new_inode(inode); d_instantiate(dentry, inode); + if (IS_DIRSYNC(dir)) + ext4_handle_sync(handle); + out_stop: - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; @@ -2883,15 +2878,10 @@ static int ext4_symlink(struct inode *dir, EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); } retry: - handle = ext4_journal_start(dir, EXT4_HT_DIR, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, - &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, + &dentry->d_name, 0, NULL, + EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -2944,8 +2934,12 @@ retry: } EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_add_nondir(handle, dentry, inode); + if (!err && IS_DIRSYNC(dir)) + ext4_handle_sync(handle); + out_stop: - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; -- cgit v1.2.3 From 8de5c325b4ee7d5b23b95edd28b81c9a2a9f427f Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 14 Feb 2013 15:11:41 -0500 Subject: ext4: use KERN_WARNING for warning messages Some messages printed related to a WARN_ON(1) were printed using KERN_NOTICE. Use KERN_WARNING or ext4_warning() instead so that context related to the WARN_ON() is printed at the same printk warning level (and log files, etc.) Signed-off-by: "Theodore Ts'o" --- fs/ext4/ialloc.c | 12 ++++++------ fs/ext4/inode.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/ext4/ialloc.c') diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 91d8fe3aced3..32fd2b9075dd 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1028,17 +1028,17 @@ iget_failed: inode = NULL; bad_orphan: ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino); - printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n", + printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n", bit, (unsigned long long)bitmap_bh->b_blocknr, ext4_test_bit(bit, bitmap_bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); + printk(KERN_WARNING "inode=%p\n", inode); if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", + printk(KERN_WARNING "is_bad_inode(inode)=%d\n", is_bad_inode(inode)); - printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", + printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n", NEXT_ORPHAN(inode)); - printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink); + printk(KERN_WARNING "max_ino=%lu\n", max_ino); + printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink); /* Avoid freeing blocks if we got a bad deleted inode */ if (inode->i_nlink == 0) inode->i_blocks = 0; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2fa18bb0bf3c..b85d5dae726b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -343,7 +343,7 @@ void ext4_da_update_reserve_space(struct inode *inode, spin_lock(&ei->i_block_reservation_lock); trace_ext4_da_update_reserve_space(inode, used, quota_claim); if (unlikely(used > ei->i_reserved_data_blocks)) { - ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " + ext4_warning(inode->i_sb, "%s: ino %lu, used %d " "with only %d reserved data blocks", __func__, inode->i_ino, used, ei->i_reserved_data_blocks); @@ -352,7 +352,7 @@ void ext4_da_update_reserve_space(struct inode *inode, } if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) { - ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d " + ext4_warning(inode->i_sb, "%s: ino %lu, allocated %d " "with only %d reserved metadata blocks\n", __func__, inode->i_ino, ei->i_allocated_meta_blocks, ei->i_reserved_meta_blocks); @@ -1263,7 +1263,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free) * function is called from invalidate page, it's * harmless to return without any action. */ - ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " + ext4_warning(inode->i_sb, "ext4_da_release_space: " "ino %lu, to_free %d with only %d reserved " "data blocks", inode->i_ino, to_free, ei->i_reserved_data_blocks); -- cgit v1.2.3