diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_icache.c | 309 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 70 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 59 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 11 |
11 files changed, 486 insertions, 28 deletions
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index abfd56775c61..d1a9e1571e38 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -228,6 +228,19 @@ xfs_reclaim_work_queue( rcu_read_unlock(); } +/* Queue a new inode inactivation pass if there are reclaimable inodes. */ +static void +xfs_inactive_work_queue( + struct xfs_mount *mp) +{ + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) + queue_delayed_work(mp->m_inactive_workqueue, + &mp->m_inactive_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); + rcu_read_unlock(); +} + /* * This is a fast pass over the inode cache to try to get reclaim moving on as * many inodes as possible in a short period of time. It kicks itself every few @@ -246,10 +259,86 @@ xfs_reclaim_worker( xfs_reclaim_work_queue(mp); } +/* + * Set the per-ag "inodes awaiting inactivation" tag. This isn't a real tag; + * we overload the RECLAIM tag to cover both inactive and reclaimable inodes. + * We maintain separate perag counters for both types, and move counts as inodes + * wander the state machine. + * + * When an inode hits zero refcount, we: + * - Set the RECLAIMABLE inode flag + * - Set the RECLAIM tag in the per-AG inode tree + * - Set the RECLAIM tag in the per-fs AG tree + * + * If the inode needs inactivation, we: + * - Set the NEED_INACTIVE inode flag + * - Increment the per-AG inactive count + * - Schedule background inode inactivation + * + * If the inode did not need inactivation, we: + * - Increment the per-AG reclaim count + * - Schedule background inode reclamation + * + * When it is time for background inode inactivation, we: + * - Set the INACTIVATING inode flag + * - Make all the on-disk updates + * - Clear both INACTIVATING and NEED_INACTIVE inode flags + * - Decrement the per-AG inactive count + * - Increment the per-AG reclaim count + * - Schedule background inode reclamation + * + * When it is time for background inode reclamation, we: + * - Set the IRECLAIM inode flag + * - Detach all the resources and remove the inode from the per-AG inode tree + * - Clear both IRECLAIM and RECLAIMABLE inode flags + * - Decrement the per-AG reclaim count + * - Clear the RECLAIM tag from the per-AG inode tree + * - Clear the RECLAIM tag from the per-fs AG tree if there are no more + * inodes waiting for reclamation or inactivation + */ static void -xfs_perag_set_reclaim_tag( +xfs_perag_set_inactive_tag( + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + struct xfs_mount *mp = pag->pag_mount; + + lockdep_assert_held(&pag->pag_ici_lock); + if (pag->pag_ici_inactive++ == 0) { + /* propagate the reclaim tag up into the perag radix tree */ + spin_lock(&mp->m_perag_lock); + radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, + XFS_ICI_RECLAIM_TAG); + spin_unlock(&mp->m_perag_lock); + } + + /* + * Schedule periodic background inode inactivation. Inactivation can + * take a while, so we allow the deferral of an already-scheduled + * inactivation on the grounds that xfs_fs_destroy_inode has a better + * idea of when it ought to force inactivation, and in the mean time + * we prefer batching. + */ + xfs_inactive_work_queue(mp); + + trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); +} + +/* Move an inode from inactive to reclaim. */ +static void +xfs_perag_clear_inactive_tag( struct xfs_perag *pag) { + lockdep_assert_held(&pag->pag_ici_lock); + pag->pag_ici_inactive--; + pag->pag_ici_reclaimable++; +} + +static void +xfs_perag_set_reclaim_tag( + struct xfs_perag *pag, + struct xfs_inode *ip) +{ struct xfs_mount *mp = pag->pag_mount; lockdep_assert_held(&pag->pag_ici_lock); @@ -275,7 +364,7 @@ xfs_perag_clear_reclaim_tag( struct xfs_mount *mp = pag->pag_mount; lockdep_assert_held(&pag->pag_ici_lock); - if (--pag->pag_ici_reclaimable) + if (--pag->pag_ici_reclaimable || pag->pag_ici_inactive > 0) return; /* clear the reclaim tag from the perag radix tree */ @@ -294,10 +383,12 @@ xfs_perag_clear_reclaim_tag( */ void xfs_inode_set_reclaim_tag( - struct xfs_inode *ip) + struct xfs_inode *ip, + bool need_inactive) { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; + unsigned long iflags = need_inactive ? XFS_NEED_INACTIVE : 0; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); spin_lock(&pag->pag_ici_lock); @@ -305,8 +396,11 @@ xfs_inode_set_reclaim_tag( radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); - xfs_perag_set_reclaim_tag(pag); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + if (need_inactive) + xfs_perag_set_inactive_tag(pag, ip); + else + xfs_perag_set_reclaim_tag(pag, ip); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE | iflags); spin_unlock(&ip->i_flags_lock); spin_unlock(&pag->pag_ici_lock); @@ -385,6 +479,13 @@ xfs_iget_check_free_state( struct xfs_inode *ip, int flags) { + /* + * Unlinked inodes awaiting inactivation must not be reused until we + * have a chance to clear the on-disk metadata. + */ + if (VFS_I(ip)->i_nlink == 0 && (ip->i_flags & XFS_NEED_INACTIVE)) + return -ENOENT; + if (flags & XFS_IGET_CREATE) { /* should be a free inode */ if (VFS_I(ip)->i_mode != 0) { @@ -444,14 +545,14 @@ xfs_iget_cache_hit( /* * If we are racing with another cache hit that is currently * instantiating this inode or currently recycling it out of - * reclaimabe state, wait for the initialisation to complete + * reclaimable state, wait for the initialisation to complete * before continuing. * * XXX(hch): eventually we should do something equivalent to * wait_on_inode to wait for these flags to be cleared * instead of polling for it. */ - if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { + if (ip->i_flags & (XFS_INEW | XFS_IRECLAIM | XFS_INACTIVATING)) { trace_xfs_iget_skip(ip); XFS_STATS_INC(mp, xs_ig_frecycle); error = -EAGAIN; @@ -471,6 +572,8 @@ xfs_iget_cache_hit( * Need to carefully get it back into useable state. */ if (ip->i_flags & XFS_IRECLAIMABLE) { + bool needed_inactive; + trace_xfs_iget_reclaim(ip); if (flags & XFS_IGET_INCORE) { @@ -479,16 +582,33 @@ xfs_iget_cache_hit( } /* + * If we played inactivation accounting tricks with this inode + * we have to undo them prior to resurrecting this inode. + */ + needed_inactive = (ip->i_flags & XFS_NEED_INACTIVE); + + /* * We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode * from stomping over us while we recycle the inode. We can't * clear the radix tree reclaimable tag yet as it requires * pag_ici_lock to be held exclusive. + * + * Clear NEED_INACTIVE so that the inactive worker won't + * touch this inode now that we're trying to resurrect it. */ ip->i_flags |= XFS_IRECLAIM; + ip->i_flags &= ~XFS_NEED_INACTIVE; spin_unlock(&ip->i_flags_lock); rcu_read_unlock(); + if (needed_inactive) { + xfs_inode_inactivation_cleanup(ip); + spin_lock(&pag->pag_ici_lock); + xfs_perag_clear_inactive_tag(pag); + spin_unlock(&pag->pag_ici_lock); + } + error = xfs_reinit_inode(mp, inode); if (error) { bool wake; @@ -1082,6 +1202,7 @@ xfs_inode_ag_iterator_tag( STATIC bool xfs_reclaim_inode_grab( struct xfs_inode *ip, + struct xfs_eofblocks *eofb, int flags) { ASSERT(rcu_read_lock_held()); @@ -1111,7 +1232,8 @@ xfs_reclaim_inode_grab( */ spin_lock(&ip->i_flags_lock); if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || - __xfs_iflags_test(ip, XFS_IRECLAIM)) { + __xfs_iflags_test(ip, XFS_IRECLAIM) || + __xfs_iflags_test(ip, XFS_NEED_INACTIVE)) { /* not a reclaim candidate. */ spin_unlock(&ip->i_flags_lock); return false; @@ -1170,6 +1292,8 @@ xfs_reclaim_inode( xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ int error; + trace_xfs_inode_reclaiming(ip); + restart: error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); @@ -1302,8 +1426,10 @@ out: STATIC int xfs_reclaim_inodes_pag( struct xfs_perag *pag, + struct xfs_eofblocks *eofb, int sync_flags, bool (*grab_fn)(struct xfs_inode *ip, + struct xfs_eofblocks *eofb, int sync_flags), int (*execute_fn)(struct xfs_inode *ip, struct xfs_perag *pag, @@ -1338,7 +1464,7 @@ xfs_reclaim_inodes_pag( for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; - if (*done || !grab_fn(ip, sync_flags)) + if (*done || !grab_fn(ip, eofb, sync_flags)) batch[i] = NULL; /* @@ -1418,7 +1544,7 @@ restart: } else mutex_lock(&pag->pag_ici_reclaim_lock); - error = xfs_reclaim_inodes_pag(pag, flags, + error = xfs_reclaim_inodes_pag(pag, NULL, flags, xfs_reclaim_inode_grab, xfs_reclaim_inode, nr_to_scan, &done); if (error && last_error != -EFSCORRUPTED) @@ -1497,6 +1623,161 @@ xfs_reclaim_inodes_count( return reclaimable; } +/* + * Grab the inode for inactivation exclusively. + * Return true if we grabbed it. + */ +STATIC bool +xfs_inactive_inode_grab( + struct xfs_inode *ip, + struct xfs_eofblocks *eofb, + int sync_flags) +{ + ASSERT(rcu_read_lock_held()); + + /* quick check for stale RCU freed inode */ + if (!ip->i_ino) + return false; + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. + * + * Due to RCU lookup, we may find inodes that have been freed and only + * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that + * aren't candidates for reclaim at all, so we must check the + * XFS_IRECLAIMABLE is set first before proceeding to reclaim. + * Obviously if XFS_NEED_INACTIVE isn't set then we ignore this inode. + */ + spin_lock(&ip->i_flags_lock); + if (!(ip->i_flags & XFS_IRECLAIMABLE) || + !(ip->i_flags & XFS_NEED_INACTIVE) || + (ip->i_flags & XFS_INACTIVATING)) { + /* not a inactivation candidate. */ + spin_unlock(&ip->i_flags_lock); + return false; + } + + if (!xfs_inode_matches_eofb(ip, eofb)) { + spin_unlock(&ip->i_flags_lock); + return false; + } + + ip->i_flags |= XFS_INACTIVATING; + spin_unlock(&ip->i_flags_lock); + return true; +} + +/* Inactivate this inode. */ +STATIC int +xfs_inactive_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int sync_flags) +{ + ASSERT(ip->i_mount->m_super->s_writers.frozen < SB_FREEZE_FS); + + trace_xfs_inode_inactivating(ip); + + /* Update metadata prior to freeing inode. */ + xfs_inode_inactivation_cleanup(ip); + xfs_inactive(ip); + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + spin_lock(&pag->pag_ici_lock); + xfs_perag_clear_inactive_tag(pag); + spin_unlock(&pag->pag_ici_lock); + + /* Kick the inactive inode to reclaim now that we've made updates. */ + spin_lock(&ip->i_flags_lock); + ip->i_flags &= ~(XFS_NEED_INACTIVE | XFS_INACTIVATING); + ASSERT(ip->i_flags & XFS_IRECLAIMABLE); + xfs_reclaim_work_queue(ip->i_mount); + spin_unlock(&ip->i_flags_lock); + return 0; +} + +/* + * Walk the AGs and reclaim the inodes in them. Even if the filesystem is + * corrupted, we still need to clear the INACTIVE iflag so that we can move + * on to reclaiming the inode. + */ +int +xfs_inactive_inodes( + struct xfs_mount *mp, + struct xfs_eofblocks *eofb) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno; + int last_error = 0; + int error; + + /* + * We want to skip inode inactivation while the filesystem is frozen + * because we don't want the inactivation thread to block while taking + * sb_intwrite. Therefore, we try to take sb_write for the duration + * of the inactive scan -- a freeze attempt will block until we're + * done here, and if the fs is past stage 1 freeze we'll bounce out + * until things unfreeze. If the fs goes down while frozen we'll + * still have log recovery to clean up after us. + */ + if (!sb_start_write_trylock(mp->m_super)) + return -EAGAIN; + + agno = 0; + while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) { + int nr_to_scan = INT_MAX; + bool done = false; + + agno = pag->pag_agno + 1; + error = xfs_reclaim_inodes_pag(pag, eofb, 0, + xfs_inactive_inode_grab, xfs_inactive_inode, + &nr_to_scan, &done); + if (error && last_error != -EFSCORRUPTED) + last_error = error; + xfs_perag_put(pag); + } + + sb_end_write(mp->m_super); + return last_error; +} + +/* Try to get inode inactivation moving. */ +void +xfs_inactive_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_inactive_work); + int error; + + error = xfs_inactive_inodes(mp, NULL); + if (error && error != -EAGAIN) + xfs_err(mp, "inode inactivation failed, error %d", error); + xfs_inactive_work_queue(mp); +} + +/* Flush all inode inactivation work that might be queued. */ +void +xfs_inactive_force( + struct xfs_mount *mp) +{ + queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0); + flush_delayed_work(&mp->m_inactive_work); +} + +/* + * Flush all inode inactivation work that might be queued and make sure the + * delayed work item is not queued. + */ +void +xfs_inactive_deactivate( + struct xfs_mount *mp) +{ + cancel_delayed_work_sync(&mp->m_inactive_work); + flush_workqueue(mp->m_inactive_workqueue); + xfs_inactive_inodes(mp, NULL); +} + STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, @@ -1611,6 +1892,14 @@ __xfs_inode_free_quota_eofblocks( return scan; } +/* Flush any inode with the same quota as this inode. */ +int +xfs_inactive_free_quota( + struct xfs_inode *ip) +{ + return __xfs_inode_free_quota_eofblocks(ip, xfs_inactive_inodes); +} + int xfs_inode_free_quota_eofblocks( struct xfs_inode *ip) diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 26c0626f1f75..fd4073debd6e 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -55,7 +55,7 @@ int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); int xfs_reclaim_inodes_count(struct xfs_mount *mp); long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); -void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip, bool need_inactive); void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip); void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip); @@ -122,4 +122,10 @@ int xfs_icache_inode_is_allocated(struct xfs_mount *mp, struct xfs_trans *tp, void xfs_icache_disable_reclaim(struct xfs_mount *mp); void xfs_icache_enable_reclaim(struct xfs_mount *mp); +void xfs_inactive_worker(struct work_struct *work); +int xfs_inactive_inodes(struct xfs_mount *mp, struct xfs_eofblocks *eofb); +void xfs_inactive_force(struct xfs_mount *mp); +void xfs_inactive_deactivate(struct xfs_mount *mp); +int xfs_inactive_free_quota(struct xfs_inode *ip); + #endif diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index ee4ac4530ec6..757e1fde0c50 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1842,6 +1842,62 @@ xfs_inode_iadjust( xfs_qm_iadjust(ip, direction, inodes, dblocks, rblocks); } +/* Clean up inode inactivation. */ +void +xfs_inode_inactivation_cleanup( + struct xfs_inode *ip) +{ + int ret; + + /* + * Undo the pending-inactivation counter updates since we're bringing + * this inode back to life. + */ + ret = xfs_qm_dqattach(ip); + if (ret) + xfs_err(ip->i_mount, "error %d reactivating inode quota", ret); + + xfs_inode_iadjust(ip, -1); +} + +/* Prepare inode for inactivation. */ +void +xfs_inode_inactivation_prep( + struct xfs_inode *ip) +{ + int ret; + + /* + * If this inode is unlinked (and now unreferenced) we need to dispose + * of it in the on disk metadata. + * + * Bump generation so that the inode can't be opened by handle now that + * the last external references has dropped. Bulkstat won't return + * inodes with zero nlink so nobody will ever find this inode again. + * Then add this inode & blocks to the counts of things that will be + * freed during the next inactivation run. + */ + if (VFS_I(ip)->i_nlink == 0) + VFS_I(ip)->i_generation++; + + /* + * Increase the pending-inactivation counters so that the fs looks like + * it's free. + */ + ret = xfs_qm_dqattach(ip); + if (ret) + xfs_err(ip->i_mount, "error %d inactivating inode quota", ret); + + xfs_inode_iadjust(ip, 1); + + /* + * Detach dquots just in case someone tries a quotaoff while + * the inode is waiting on the inactive list. We'll reattach + * them (if needed) when inactivating the inode. + */ + xfs_qm_dqdetach(ip); +} + /* * Returns true if we need to update the on-disk metadata before we can free * the memory used by this inode. Updates include freeing post-eof @@ -1933,6 +1989,16 @@ xfs_inactive( if (mp->m_flags & XFS_MOUNT_RDONLY) return; + /* + * Re-attach dquots prior to freeing EOF blocks or CoW staging extents. + * We dropped the dquot prior to inactivation (because quotaoff can't + * resurrect inactive inodes to force-drop the dquot) so we /must/ + * do this before touching any block mappings. + */ + error = xfs_qm_dqattach(ip); + if (error) + return; + /* Try to clean out the cow blocks if there are any. */ if (xfs_inode_has_cow_data(ip)) xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); @@ -1958,10 +2024,6 @@ xfs_inactive( ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0)) truncate = 1; - error = xfs_qm_dqattach(ip); - if (error) - return; - if (S_ISLNK(VFS_I(ip)->i_mode)) error = xfs_inactive_symlink(ip); else if (truncate) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index cacbbbfb2a81..fa178a68b389 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -214,6 +214,7 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */ #define __XFS_INEW_BIT 3 /* inode has just been allocated */ #define XFS_INEW (1 << __XFS_INEW_BIT) +#define XFS_NEED_INACTIVE (1 << 4) /* see XFS_INACTIVATING below */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */ #define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */ @@ -231,13 +232,23 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) #define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */ /* + * If we need to update on-disk metadata before this IRECLAIMABLE inode can be + * freed, then NEED_INACTIVE will be set. Once we start the updates, the + * INACTIVATING bit will be set to keep iget away from this inode. After the + * inactivation completes, both flags will be cleared and the inode is a + * plain old IRECLAIMABLE inode. + */ +#define XFS_INACTIVATING (1 << 13) + +/* * Per-lifetime flags need to be reset when re-using a reclaimable inode during * inode lookup. This prevents unintended behaviour on the new inode from * ocurring. */ #define XFS_IRECLAIM_RESET_FLAGS \ (XFS_IRECLAIMABLE | XFS_IRECLAIM | \ - XFS_IDIRTY_RELEASE | XFS_ITRUNCATED) + XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \ + XFS_INACTIVATING) /* * Synchronize processes attempting to flush the in-core inode back to disk. @@ -501,6 +512,8 @@ extern struct kmem_zone *xfs_inode_zone; bool xfs_inode_verify_forks(struct xfs_inode *ip); int xfs_has_eofblocks(struct xfs_inode *ip, bool *has); bool xfs_inode_needs_inactivation(struct xfs_inode *ip); +void xfs_inode_inactivation_prep(struct xfs_inode *ip); +void xfs_inode_inactivation_cleanup(struct xfs_inode *ip); int xfs_iunlink_init(struct xfs_perag *pag); void xfs_iunlink_destroy(struct xfs_perag *pag); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 27c93b5f029d..95245761709b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -177,6 +177,7 @@ xfs_iomap_write_direct( int lockmode; int bmapi_flags = XFS_BMAPI_PREALLOC; uint tflags = 0; + bool flush_inactive = true; rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); @@ -249,8 +250,14 @@ xfs_iomap_write_direct( resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; } } +start_over: error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, resrtextents, tflags, &tp); + if (error == -ENOSPC && flush_inactive) { + flush_inactive = false; + xfs_inactive_force(mp); + goto start_over; + } if (error) return error; @@ -449,6 +456,7 @@ xfs_iomap_prealloc_size( alloc_blocks); freesp = percpu_counter_read_positive(&mp->m_fdblocks); + freesp += percpu_counter_read_positive(&mp->m_dinactive); if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { shift = 2; if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index f5fb8885662f..248c25bb4e67 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -5178,6 +5178,13 @@ xlog_recover_process_iunlinks( } xfs_buf_rele(agibp); } + + /* + * Now that we've put all the iunlink inodes on the lru, let's make + * sure that we perform all the on-disk metadata updates to actually + * free those inodes. + */ + xfs_inactive_force(mp); } STATIC int diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 9a181f7ca1d5..1bc5c27a9425 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1073,6 +1073,7 @@ xfs_mountfs( * qm_unmount_quotas and therefore rely on qm_unmount to release the * quota inodes. */ + xfs_inactive_deactivate(mp); cancel_delayed_work_sync(&mp->m_reclaim_work); xfs_reclaim_inodes(mp, SYNC_WAIT); out_log_dealloc: @@ -1111,7 +1112,15 @@ xfs_unmountfs( uint64_t resblks; int error; + /* + * Perform all on-disk metadata updates required to inactivate inodes. + * Since this can involve finobt updates, do it now before we lose the + * per-AG space reservations. + */ + xfs_inactive_force(mp); + xfs_icache_disable_reclaim(mp); + xfs_fs_unreserve_ag_blocks(mp); xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); @@ -1161,6 +1170,13 @@ xfs_unmountfs( xfs_qm_unmount(mp); /* + * Kick off inode inactivation again to push the metadata inodes past + * INACTIVE into RECLAIM. We also have to deactivate the inactivation + * worker. + */ + xfs_inactive_deactivate(mp); + + /* * Unreserve any blocks we have so that when we unmount we don't account * the reserved free space as used. This is really only necessary for * lazy superblock counting because it trusts the incore superblock diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1844ce1053bb..1ca871a5c349 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -153,6 +153,7 @@ typedef struct xfs_mount { trimming */ struct delayed_work m_cowblocks_work; /* background cow blocks trimming */ + struct delayed_work m_inactive_work; /* background inode inactive */ bool m_update_sb; /* sb needs update in mount */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ @@ -167,6 +168,7 @@ typedef struct xfs_mount { struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_cil_workqueue; struct workqueue_struct *m_reclaim_workqueue; + struct workqueue_struct *m_inactive_workqueue; struct workqueue_struct *m_log_workqueue; struct workqueue_struct *m_eofblocks_workqueue; struct workqueue_struct *m_sync_workqueue; @@ -372,7 +374,8 @@ typedef struct xfs_perag { spinlock_t pag_ici_lock; /* incore inode cache lock */ struct radix_tree_root pag_ici_root; /* incore inode cache root */ - int pag_ici_reclaimable; /* reclaimable inodes */ + unsigned int pag_ici_reclaimable; /* reclaimable inodes */ + unsigned int pag_ici_inactive; /* inactive inodes */ struct mutex pag_ici_reclaim_lock; /* serialisation point */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index ba058b8da8a8..d96f146e3fe6 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -48,6 +48,12 @@ xfs_qm_scall_quotaoff( xfs_qoff_logitem_t *qoffstart; /* + * Clean up the inactive list before we turn quota off, to reduce the + * amount of quotaoff work we have to do with the mutex held. + */ + xfs_inactive_force(mp); + + /* * No file system can have quotas enabled on disk but not in core. * Note that quota utilities (like quotaoff) _expect_ * errno == -EEXIST here. diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d8aadf5db10f..1953fb76df6f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -874,8 +874,15 @@ xfs_init_mount_workqueues( if (!mp->m_sync_workqueue) goto out_destroy_eofb; + mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s", + WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + if (!mp->m_inactive_workqueue) + goto out_destroy_sync; + return 0; +out_destroy_sync: + destroy_workqueue(mp->m_inactive_workqueue); out_destroy_eofb: destroy_workqueue(mp->m_eofblocks_workqueue); out_destroy_log: @@ -898,6 +905,7 @@ STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { + destroy_workqueue(mp->m_inactive_workqueue); destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_eofblocks_workqueue); destroy_workqueue(mp->m_log_workqueue); @@ -970,28 +978,34 @@ xfs_fs_destroy_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + bool need_inactive; trace_xfs_destroy_inode(ip); ASSERT(!rwsem_is_locked(&inode->i_rwsem)); - XFS_STATS_INC(ip->i_mount, vn_rele); - XFS_STATS_INC(ip->i_mount, vn_remove); - - xfs_inactive(ip); - - if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { + XFS_STATS_INC(mp, vn_rele); + XFS_STATS_INC(mp, vn_remove); + + need_inactive = xfs_inode_needs_inactivation(ip); + if (need_inactive) { + trace_xfs_inode_set_need_inactive(ip); + xfs_inode_inactivation_prep(ip); + } else if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { xfs_check_delalloc(ip, XFS_DATA_FORK); xfs_check_delalloc(ip, XFS_COW_FORK); ASSERT(0); } - - XFS_STATS_INC(ip->i_mount, vn_reclaim); + XFS_STATS_INC(mp, vn_reclaim); + trace_xfs_inode_set_reclaimable(ip); /* * We should never get here with one of the reclaim flags already set. */ ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_NEED_INACTIVE)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_INACTIVATING)); /* * We always use background reclaim here because even if the @@ -1000,7 +1014,7 @@ xfs_fs_destroy_inode( * this more efficiently than we can here, so simply let background * reclaim tear down all inodes. */ - xfs_inode_set_reclaim_tag(ip); + xfs_inode_set_reclaim_tag(ip, need_inactive); } static void @@ -1234,6 +1248,12 @@ xfs_quiesce_attr( while (atomic_read(&mp->m_active_trans) > 0) delay(100); + /* + * Perform all on-disk metadata updates required to inactivate inodes. + * This has to be done before we force the log out. + */ + xfs_inactive_deactivate(mp); + /* force the log to unpin objects from the now complete transactions */ xfs_log_force(mp, XFS_LOG_SYNC); @@ -1407,6 +1427,13 @@ xfs_fs_remount( return error; } + /* + * Perform all on-disk metadata updates required to inactivate + * inodes. Since this can involve finobt updates, do it now + * before we lose the per-AG space reservations. + */ + xfs_inactive_force(mp); + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { @@ -1460,6 +1487,18 @@ xfs_fs_unfreeze( return 0; } +/* + * Before we get to stage 1 of a freeze, force all the inactivation work so + * that there's less work to do if we crash during the freeze. + */ +STATIC int +xfs_fs_freeze_super( + struct super_block *sb) +{ + xfs_inactive_force(XFS_M(sb)); + return freeze_super(sb); +} + STATIC int xfs_fs_show_options( struct seq_file *m, @@ -1622,6 +1661,7 @@ xfs_mount_alloc( INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); + INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker); mp->m_kobj.kobject.kset = xfs_kset; return mp; } @@ -1882,6 +1922,7 @@ static const struct super_operations xfs_super_operations = { .show_options = xfs_fs_show_options, .nr_cached_objects = xfs_fs_nr_cached_objects, .free_cached_objects = xfs_fs_free_cached_objects, + .freeze_super = xfs_fs_freeze_super, }; static struct file_system_type xfs_fs_type = { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 140f2ff9b89a..2bed31cf351c 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -588,14 +588,17 @@ DECLARE_EVENT_CLASS(xfs_inode_class, TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) + __field(unsigned long, iflags) ), TP_fast_assign( __entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->ino = ip->i_ino; + __entry->iflags = ip->i_flags; ), - TP_printk("dev %d:%d ino 0x%llx", + TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx", MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino) + __entry->ino, + __entry->iflags) ) #define DEFINE_INODE_EVENT(name) \ @@ -639,6 +642,10 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid); DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); +DEFINE_INODE_EVENT(xfs_inode_set_reclaimable); +DEFINE_INODE_EVENT(xfs_inode_reclaiming); +DEFINE_INODE_EVENT(xfs_inode_set_need_inactive); +DEFINE_INODE_EVENT(xfs_inode_inactivating); /* * ftrace's __print_symbolic requires that all enum values be wrapped in the |