From 1f6d64829db78a7e1d63e15c9f48f0a5d2b5a679 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 6 Jun 2014 15:59:59 +1000 Subject: xfs: block allocation work needs to be kswapd aware Upon memory pressure, kswapd calls xfs_vm_writepage() from shrink_page_list(). This can result in delayed allocation occurring and that gets deferred to the the allocation workqueue. The allocation then runs outside kswapd context, which means if it needs memory (and it does to demand page metadata from disk) it can block in shrink_inactive_list() waiting for IO congestion. These blocking waits are normally avoiding in kswapd context, so under memory pressure writeback from kswapd can be arbitrarily delayed by memory reclaim. To avoid this, pass the kswapd context to the allocation being done by the workqueue, so that memory reclaim understands correctly that the work is being done for kswapd and therefore it is not blocked and does not delay memory reclaim. To avoid issues with int->char conversion of flag fields (as noticed in v1 of this patch) convert the flag fields in the struct xfs_bmalloca to bool types. pahole indicates these variables are still single byte variables, so no extra space is consumed by this change. cc: Reported-by: Tetsuo Handa Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_bmap_util.c | 16 +++++++++++++--- fs/xfs/xfs_bmap_util.h | 13 +++++++------ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 296160b8e78c..47a9daa7b4e6 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -258,14 +258,23 @@ xfs_bmapi_allocate_worker( struct xfs_bmalloca *args = container_of(work, struct xfs_bmalloca, work); unsigned long pflags; + unsigned long new_pflags = PF_FSTRANS; - /* we are in a transaction context here */ - current_set_flags_nested(&pflags, PF_FSTRANS); + /* + * we are in a transaction context here, but may also be doing work + * in kswapd context, and hence we may need to inherit that state + * temporarily to ensure that we don't block waiting for memory reclaim + * in any way. + */ + if (args->kswapd) + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + + current_set_flags_nested(&pflags, new_pflags); args->result = __xfs_bmapi_allocate(args); complete(args->done); - current_restore_flags_nested(&pflags, PF_FSTRANS); + current_restore_flags_nested(&pflags, new_pflags); } /* @@ -284,6 +293,7 @@ xfs_bmapi_allocate( args->done = &done; + args->kswapd = current_is_kswapd(); INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); queue_work(xfs_alloc_wq, &args->work); wait_for_completion(&done); diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 935ed2b24edf..075f72232a64 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -50,12 +50,13 @@ struct xfs_bmalloca { xfs_extlen_t total; /* total blocks needed for xaction */ xfs_extlen_t minlen; /* minimum allocation size (blocks) */ xfs_extlen_t minleft; /* amount must be left after alloc */ - char eof; /* set if allocating past last extent */ - char wasdel; /* replacing a delayed allocation */ - char userdata;/* set if is user data */ - char aeof; /* allocated space at eof */ - char conv; /* overwriting unwritten extents */ - char stack_switch; + bool eof; /* set if allocating past last extent */ + bool wasdel; /* replacing a delayed allocation */ + bool userdata;/* set if is user data */ + bool aeof; /* allocated space at eof */ + bool conv; /* overwriting unwritten extents */ + bool stack_switch; + bool kswapd; /* allocation in kswapd context */ int flags; struct completion *done; struct work_struct work; -- cgit v1.2.3 From 556b8883cfac3d3203557e161ea8005f8b5479b2 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 6 Jun 2014 16:00:43 +1000 Subject: xfs: xfs_readsb needs to check for magic numbers Commit daba542 ("xfs: skip verification on initial "guess" superblock read") dropped the use of a verifier for the initial superblock read so we can probe the sector size of the filesystem stored in the superblock. It, however, now fails to validate that what was read initially is actually an XFS superblock and hence will fail the sector size check and return ENOSYS. This causes probe-based mounts to fail because it expects XFS to return EINVAL when it doesn't recognise the superblock format. cc: Reported-by: Plamen Petrov Tested-by: Plamen Petrov Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 944f3d9456a8..a9e29ea37620 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -323,8 +323,19 @@ reread: /* * Initialize the mount structure from the superblock. */ - xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); - xfs_sb_quota_from_disk(&mp->m_sb); + xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); + xfs_sb_quota_from_disk(sbp); + + /* + * If we haven't validated the superblock, do so now before we try + * to check the sector size and reread the superblock appropriately. + */ + if (sbp->sb_magicnum != XFS_SB_MAGIC) { + if (loud) + xfs_warn(mp, "Invalid superblock magic number"); + error = EINVAL; + goto release_buf; + } /* * We must be able to do sector-sized and sector-aligned IO. @@ -337,11 +348,11 @@ reread: goto release_buf; } - /* - * Re-read the superblock so the buffer is correctly sized, - * and properly verified. - */ if (buf_ops == NULL) { + /* + * Re-read the superblock so the buffer is correctly sized, + * and properly verified. + */ xfs_buf_relse(bp); sector_size = sbp->sb_sectsize; buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops; -- cgit v1.2.3 From 36de95567f910f5544060f50346d8677ae13ad22 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 6 Jun 2014 16:02:12 +1000 Subject: xfs: kill xfs_buf_geterror() Most of the callers are just calling ASSERT(!xfs_buf_geterror()) which means they are checking for bp->b_error == 0. If bp is null in this case, we will assert fail, and hence it's no different in result to oopsing because of a null bp. In some cases, errors have already been checked for or the function returning the buffer can't return a buffer with an error, so it's just a redundant assert. Either way, the assert can either be removed. The other two non-assert callers can just test for a buffer and error properly. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_alloc.c | 1 - fs/xfs/xfs_btree.c | 12 ++---------- fs/xfs/xfs_buf.h | 5 ----- fs/xfs/xfs_buf_item.c | 2 +- fs/xfs/xfs_dquot.c | 6 +++--- fs/xfs/xfs_ialloc.c | 1 - fs/xfs/xfs_log.c | 2 +- fs/xfs/xfs_rtbitmap.c | 1 - 8 files changed, 7 insertions(+), 23 deletions(-) diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index c1cf6a336a72..077c3417a54e 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -541,7 +541,6 @@ xfs_alloc_read_agfl( XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); xfs_buf_set_ref(bp, XFS_AGFL_REF); *bpp = bp; return 0; diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index e80d59fdf89a..22707adb99ab 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -552,14 +552,11 @@ xfs_btree_get_bufl( xfs_fsblock_t fsbno, /* file system block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(!xfs_buf_geterror(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -574,15 +571,12 @@ xfs_btree_get_bufs( xfs_agblock_t agbno, /* allocation group block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(!xfs_buf_geterror(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -722,7 +716,6 @@ xfs_btree_read_bufl( mp->m_bsize, lock, &bp, ops); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); if (bp) xfs_buf_set_ref(bp, refval); *bpp = bp; @@ -1178,7 +1171,6 @@ xfs_btree_read_buf_block( if (error) return error; - ASSERT(!xfs_buf_geterror(*bpp)); xfs_btree_set_refs(cur, *bpp); *block = XFS_BUF_TO_BLOCK(*bpp); return 0; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index b8a3abf6cf47..a999a3941c81 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -298,11 +298,6 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, extern int xfs_bioerror_relse(struct xfs_buf *); -static inline int xfs_buf_geterror(xfs_buf_t *bp) -{ - return bp ? bp->b_error : ENOMEM; -} - /* Buffer Utility Routines */ extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 8752821443be..941f6e984ac4 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1053,7 +1053,7 @@ xfs_buf_iodone_callbacks( static ulong lasttime; static xfs_buftarg_t *lasttarg; - if (likely(!xfs_buf_geterror(bp))) + if (likely(!bp->b_error)) goto do_callbacks; /* diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 868b19f096bf..8867d0232fac 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -353,10 +353,10 @@ xfs_qm_dqalloc( dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0); - - error = xfs_buf_geterror(bp); - if (error) + if (!bp) { + error = ENOMEM; goto error1; + } bp->b_ops = &xfs_dquot_buf_ops; /* diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 8f711db61a0c..c254b1cac4f8 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -1640,7 +1640,6 @@ xfs_read_agi( if (error) return error; - ASSERT(!xfs_buf_geterror(*bpp)); xfs_buf_set_ref(*bpp, XFS_AGI_REF); return 0; } diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a5f8bd9899d3..ae871df3a5a8 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1165,7 +1165,7 @@ xlog_iodone(xfs_buf_t *bp) /* * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, + if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_buf_ioerror_alert(bp, __func__); xfs_buf_stale(bp); diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c index b1f2fe8af4a8..f4dd697cac08 100644 --- a/fs/xfs/xfs_rtbitmap.c +++ b/fs/xfs/xfs_rtbitmap.c @@ -74,7 +74,6 @@ xfs_rtbuf_get( mp->m_bsize, 0, &bp, NULL); if (error) return error; - ASSERT(!xfs_buf_geterror(bp)); *bpp = bp; return 0; } -- cgit v1.2.3 From 72208ee060635dfab2b3bd447a95e0f9c419e954 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Jun 2014 16:04:42 +1000 Subject: xfs: small cleanup in xfs_lowbit64() There are two checkpatch.pl complaints here because of the bad indenting and because of the assignment inside the condition. Signed-off-by: Dan Carpenter Reviewed-by: Eric Sandeen Signed-off-by: Dave Chinner --- fs/xfs/xfs_bit.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index f1e3c907044d..e1649c0d3e02 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -66,8 +66,11 @@ static inline int xfs_lowbit64(__uint64_t v) n = ffs(w); } else { /* upper bits */ w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; + if (w) { + n = ffs(w); + if (n) + n += 32; + } } return n - 1; } -- cgit v1.2.3 From 448011e2ab1c44f7990a62649580bde0da5242b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 6 Jun 2014 16:05:15 +1000 Subject: xfs: tone down writepage/releasepage WARN_ONs I recently ran into the issue fixed by "xfs: kill buffers over failed write ranges properly" which spams the log with lots of backtraces. Make debugging any issues like that easier by using WARN_ON_ONCE in the writeback code. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_aops.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0479c32c5eb1..ce0c5ca0a8a3 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -975,7 +975,7 @@ xfs_vm_writepage( * Given that we do not allow direct reclaim to call us, we should * never be called while in a filesystem transaction. */ - if (WARN_ON(current->flags & PF_FSTRANS)) + if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) goto redirty; /* Is this page beyond the end of the file? */ @@ -1188,9 +1188,9 @@ xfs_vm_releasepage( xfs_count_page_state(page, &delalloc, &unwritten); - if (WARN_ON(delalloc)) + if (WARN_ON_ONCE(delalloc)) return 0; - if (WARN_ON(unwritten)) + if (WARN_ON_ONCE(unwritten)) return 0; return try_to_free_buffers(page); -- cgit v1.2.3 From 30265117ee1e23fa91920f337a3ea91207f700dc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 6 Jun 2014 16:06:37 +1000 Subject: xfs: Fix rounding in xfs_alloc_fix_len() Rounding in xfs_alloc_fix_len() is wrong. As the comment states, the result should be a number of a form (k*prod+mod) however due to sign mistake the result is different. As a result allocations on raid arrays could be misaligned in some cases. This also seems to fix occasional assertion failure: XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0) in xfs_alloc_ag_vextent_size(). Also add an assertion that the result of xfs_alloc_fix_len() is of expected form. Signed-off-by: Jan Kara Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_alloc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 077c3417a54e..d43813267a80 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -257,16 +257,14 @@ xfs_alloc_fix_len( k = rlen % args->prod; if (k == args->mod) return; - if (k > args->mod) { - if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) - return; - } else { - if ((int)(rlen = rlen - args->prod - (args->mod - k)) < - (int)args->minlen) - return; - } - ASSERT(rlen >= args->minlen); - ASSERT(rlen <= args->maxlen); + if (k > args->mod) + rlen = rlen - (k - args->mod); + else + rlen = rlen - args->prod + (args->mod - k); + if ((int)rlen < (int)args->minlen) + return; + ASSERT(rlen >= args->minlen && rlen <= args->maxlen); + ASSERT(rlen % args->prod == args->mod); args->len = rlen; } -- cgit v1.2.3