diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-02-22 13:33:44 +1100 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2008-02-22 13:33:44 +1100 |
commit | 90c6fd184c8e56091e08119b97fdde1c0d133a44 (patch) | |
tree | 52700522c82540e8d2c883a0c505b55840a5cb12 /fs | |
parent | 867945496e8ad0cfe4e85aa1a00d1377efaeeafd (diff) | |
parent | 5e9ff1b89bd0c6ede15f7d9cd0818db07eb84d7f (diff) |
Merge commit 'xfs/master'
Conflicts:
fs/xfs/Makefile-linux-2.6
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_linux.h | 1 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 3 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_vnode.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_iget.c | 49 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 655 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.h | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 47 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_rename.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_vfsops.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 113 |
16 files changed, 402 insertions, 547 deletions
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 3ca39c4e5d2a..e5143323e71f 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -99,7 +99,6 @@ /* * Feature macros (disable/enable) */ -#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ #define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */ #ifdef CONFIG_SMP #define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 21dfc9da235e..df24c6fe4ce4 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -896,7 +896,8 @@ xfs_fs_write_inode( struct inode *inode, int sync) { - int error = 0, flags = FLUSH_INODE; + int error = 0; + int flags = 0; xfs_itrace_entry(XFS_I(inode)); if (sync) { diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index b5ea418693b1..f200e0244082 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -73,12 +73,9 @@ typedef enum bhv_vrwlock { #define IO_INVIS 0x00020 /* don't update inode timestamps */ /* - * Flags for vop_iflush call + * Flags for xfs_inode_flush */ #define FLUSH_SYNC 1 /* wait for flush to complete */ -#define FLUSH_INODE 2 /* flush the inode itself */ -#define FLUSH_LOG 4 /* force the last log entry for - * this inode out to disk */ /* * Flush/Invalidate options for vop_toss/flush/flushinval_pages. diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 1c0a5a585a82..759b75b90b59 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5869,6 +5869,10 @@ xfs_getbmap( /* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */ error = xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); + if (error) { + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return error; + } } ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0); diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index f01b07687faf..a959e3336931 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -78,7 +78,6 @@ xfs_iget_core( xfs_inode_t *ip; xfs_inode_t *iq; int error; - xfs_icluster_t *icl, *new_icl = NULL; unsigned long first_index, mask; xfs_perag_t *pag; xfs_agino_t agino; @@ -229,11 +228,9 @@ finish_inode: } /* - * This is a bit messy - we preallocate everything we _might_ - * need before we pick up the ici lock. That way we don't have to - * juggle locks and go all the way back to the start. + * Preload the radix tree so we can insert safely under the + * write spinlock. */ - new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP); if (radix_tree_preload(GFP_KERNEL)) { delay(1); goto again; @@ -241,17 +238,6 @@ finish_inode: mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); first_index = agino & mask; write_lock(&pag->pag_ici_lock); - - /* - * Find the cluster if it exists - */ - icl = NULL; - if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq, - first_index, 1)) { - if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index) - icl = iq->i_cluster; - } - /* * insert the new inode */ @@ -266,30 +252,13 @@ finish_inode: } /* - * These values _must_ be set before releasing ihlock! + * These values _must_ be set before releasing the radix tree lock! */ ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, XFS_INEW); - ASSERT(ip->i_cluster == NULL); - - if (!icl) { - spin_lock_init(&new_icl->icl_lock); - INIT_HLIST_HEAD(&new_icl->icl_inodes); - icl = new_icl; - new_icl = NULL; - } else { - ASSERT(!hlist_empty(&icl->icl_inodes)); - } - spin_lock(&icl->icl_lock); - hlist_add_head(&ip->i_cnode, &icl->icl_inodes); - ip->i_cluster = icl; - spin_unlock(&icl->icl_lock); - write_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); - if (new_icl) - kmem_zone_free(xfs_icluster_zone, new_icl); /* * Link ip to its mount and thread it on the mount's inode list. @@ -528,18 +497,6 @@ xfs_iextract( xfs_put_perag(mp, pag); /* - * Remove from cluster list - */ - mp = ip->i_mount; - spin_lock(&ip->i_cluster->icl_lock); - hlist_del(&ip->i_cnode); - spin_unlock(&ip->i_cluster->icl_lock); - - /* was last inode in cluster? */ - if (hlist_empty(&ip->i_cluster->icl_inodes)) - kmem_zone_free(xfs_icluster_zone, ip->i_cluster); - - /* * Remove from mount's inode list. */ XFS_MOUNT_ILOCK(mp); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a550546a7083..4e23a9bd5106 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -55,7 +55,6 @@ kmem_zone_t *xfs_ifork_zone; kmem_zone_t *xfs_inode_zone; -kmem_zone_t *xfs_icluster_zone; /* * Used in xfs_itruncate(). This is the maximum number of extents @@ -126,6 +125,90 @@ xfs_inobp_check( #endif /* + * Find the buffer associated with the given inode map + * We do basic validation checks on the buffer once it has been + * retrieved from disk. + */ +STATIC int +xfs_imap_to_bp( + xfs_mount_t *mp, + xfs_trans_t *tp, + xfs_imap_t *imap, + xfs_buf_t **bpp, + uint buf_flags, + uint imap_flags) +{ + int error; + int i; + int ni; + xfs_buf_t *bp; + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, + (int)imap->im_len, buf_flags, &bp); + if (error) { + if (error != EAGAIN) { + cmn_err(CE_WARN, + "xfs_imap_to_bp: xfs_trans_read_buf()returned " + "an error %d on %s. Returning error.", + error, mp->m_fsname); + } else { + ASSERT(buf_flags & XFS_BUF_TRYLOCK); + } + return error; + } + + /* + * Validate the magic number and version of every inode in the buffer + * (if DEBUG kernel) or the first inode in the buffer, otherwise. + */ +#ifdef DEBUG + ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; +#else /* usual case */ + ni = 1; +#endif + + for (i = 0; i < ni; i++) { + int di_ok; + xfs_dinode_t *dip; + + dip = (xfs_dinode_t *)xfs_buf_offset(bp, + (i << mp->m_sb.sb_inodelog)); + di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && + XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); + if (unlikely(XFS_TEST_ERROR(!di_ok, mp, + XFS_ERRTAG_ITOBP_INOTOBP, + XFS_RANDOM_ITOBP_INOTOBP))) { + if (imap_flags & XFS_IMAP_BULKSTAT) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EINVAL); + } + XFS_CORRUPTION_ERROR("xfs_imap_to_bp", + XFS_ERRLEVEL_HIGH, mp, dip); +#ifdef DEBUG + cmn_err(CE_PANIC, + "Device %s - bad inode magic/vsn " + "daddr %lld #%d (magic=%x)", + XFS_BUFTARG_NAME(mp->m_ddev_targp), + (unsigned long long)imap->im_blkno, i, + be16_to_cpu(dip->di_core.di_magic)); +#endif + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EFSCORRUPTED); + } + } + + xfs_inobp_check(mp, bp); + + /* + * Mark the buffer as an inode buffer now that it looks good + */ + XFS_BUF_SET_VTYPE(bp, B_FS_INO); + + *bpp = bp; + return 0; +} + +/* * This routine is called to map an inode number within a file * system to the buffer containing the on-disk version of the * inode. It returns a pointer to the buffer containing the @@ -147,72 +230,19 @@ xfs_inotobp( xfs_buf_t **bpp, int *offset) { - int di_ok; xfs_imap_t imap; xfs_buf_t *bp; int error; - xfs_dinode_t *dip; - /* - * Call the space management code to find the location of the - * inode on disk. - */ imap.im_blkno = 0; error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); - if (error != 0) { - cmn_err(CE_WARN, - "xfs_inotobp: xfs_imap() returned an " - "error %d on %s. Returning error.", error, mp->m_fsname); + if (error) return error; - } - - /* - * If the inode number maps to a block outside the bounds of the - * file system then return NULL rather than calling read_buf - * and panicing when we get an error from the driver. - */ - if ((imap.im_blkno + imap.im_len) > - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { - cmn_err(CE_WARN, - "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " - "of the file system %s. Returning EINVAL.", - (unsigned long long)imap.im_blkno, - imap.im_len, mp->m_fsname); - return XFS_ERROR(EINVAL); - } - /* - * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will - * default to just a read_buf() call. - */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, - (int)imap.im_len, XFS_BUF_LOCK, &bp); - - if (error) { - cmn_err(CE_WARN, - "xfs_inotobp: xfs_trans_read_buf() returned an " - "error %d on %s. Returning error.", error, mp->m_fsname); + error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); + if (error) return error; - } - dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); - di_ok = - be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && - XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, - XFS_RANDOM_ITOBP_INOTOBP))) { - XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); - xfs_trans_brelse(tp, bp); - cmn_err(CE_WARN, - "xfs_inotobp: XFS_TEST_ERROR() returned an " - "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); - return XFS_ERROR(EFSCORRUPTED); - } - - xfs_inobp_check(mp, bp); - /* - * Set *dipp to point to the on-disk inode in the buffer. - */ *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); *bpp = bp; *offset = imap.im_boffset; @@ -248,46 +278,21 @@ xfs_itobp( xfs_dinode_t **dipp, xfs_buf_t **bpp, xfs_daddr_t bno, - uint imap_flags) + uint imap_flags, + uint buf_flags) { xfs_imap_t imap; xfs_buf_t *bp; int error; - int i; - int ni; if (ip->i_blkno == (xfs_daddr_t)0) { - /* - * Call the space management code to find the location of the - * inode on disk. - */ imap.im_blkno = bno; - if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, - XFS_IMAP_LOOKUP | imap_flags))) + error = xfs_imap(mp, tp, ip->i_ino, &imap, + XFS_IMAP_LOOKUP | imap_flags); + if (error) return error; /* - * If the inode number maps to a block outside the bounds - * of the file system then return NULL rather than calling - * read_buf and panicing when we get an error from the - * driver. - */ - if ((imap.im_blkno + imap.im_len) > - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " - "(imap.im_blkno (0x%llx) " - "+ imap.im_len (0x%llx)) > " - " XFS_FSB_TO_BB(mp, " - "mp->m_sb.sb_dblocks) (0x%llx)", - (unsigned long long) imap.im_blkno, - (unsigned long long) imap.im_len, - XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); -#endif /* DEBUG */ - return XFS_ERROR(EINVAL); - } - - /* * Fill in the fields in the inode that will be used to * map the inode to its buffer from now on. */ @@ -305,76 +310,17 @@ xfs_itobp( } ASSERT(bno == 0 || bno == imap.im_blkno); - /* - * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will - * default to just a read_buf() call. - */ - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, - (int)imap.im_len, XFS_BUF_LOCK, &bp); - if (error) { -#ifdef DEBUG - xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " - "xfs_trans_read_buf() returned error %d, " - "imap.im_blkno 0x%llx, imap.im_len 0x%llx", - error, (unsigned long long) imap.im_blkno, - (unsigned long long) imap.im_len); -#endif /* DEBUG */ + error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); + if (error) return error; - } - /* - * Validate the magic number and version of every inode in the buffer - * (if DEBUG kernel) or the first inode in the buffer, otherwise. - * No validation is done here in userspace (xfs_repair). - */ -#if !defined(__KERNEL__) - ni = 0; -#elif defined(DEBUG) - ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; -#else /* usual case */ - ni = 1; -#endif - - for (i = 0; i < ni; i++) { - int di_ok; - xfs_dinode_t *dip; - - dip = (xfs_dinode_t *)xfs_buf_offset(bp, - (i << mp->m_sb.sb_inodelog)); - di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && - XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); - if (unlikely(XFS_TEST_ERROR(!di_ok, mp, - XFS_ERRTAG_ITOBP_INOTOBP, - XFS_RANDOM_ITOBP_INOTOBP))) { - if (imap_flags & XFS_IMAP_BULKSTAT) { - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EINVAL); - } -#ifdef DEBUG - cmn_err(CE_ALERT, - "Device %s - bad inode magic/vsn " - "daddr %lld #%d (magic=%x)", - XFS_BUFTARG_NAME(mp->m_ddev_targp), - (unsigned long long)imap.im_blkno, i, - be16_to_cpu(dip->di_core.di_magic)); -#endif - XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, - mp, dip); - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); - } + if (!bp) { + ASSERT(buf_flags & XFS_BUF_TRYLOCK); + ASSERT(tp == NULL); + *bpp = NULL; + return EAGAIN; } - xfs_inobp_check(mp, bp); - - /* - * Mark the buffer as an inode buffer now that it looks good - */ - XFS_BUF_SET_VTYPE(bp, B_FS_INO); - - /* - * Set *dipp to point to the on-disk inode in the buffer. - */ *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); *bpp = bp; return 0; @@ -878,7 +824,7 @@ xfs_iread( * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will * know that this is a new incore inode. */ - error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); + error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); if (error) { kmem_zone_free(xfs_inode_zone, ip); return error; @@ -1967,7 +1913,7 @@ xfs_iunlink( * Here we put the head pointer into our next pointer, * and then we fall through to point the head at us. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); if (error) return error; @@ -2075,7 +2021,7 @@ xfs_iunlink_remove( * of dealing with the buffer when there is no need to * change it. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2137,7 +2083,7 @@ xfs_iunlink_remove( * Now last_ibp points to the buffer previous to us on * the unlinked list. Pull us from the list. */ - error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); + error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); if (error) { cmn_err(CE_WARN, "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", @@ -2172,13 +2118,6 @@ xfs_iunlink_remove( return 0; } -STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip) -{ - return (((ip->i_itemp == NULL) || - !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && - (ip->i_update_core == 0)); -} - STATIC void xfs_ifree_cluster( xfs_inode_t *free_ip, @@ -2400,7 +2339,7 @@ xfs_ifree( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); + error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); if (error) return error; @@ -2678,14 +2617,31 @@ xfs_imap( fsbno = imap->im_blkno ? XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); - if (error != 0) { + if (error) return error; - } + imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); imap->im_len = XFS_FSB_TO_BB(mp, len); imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); imap->im_ioffset = (ushort)off; imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); + + /* + * If the inode number maps to a block outside the bounds + * of the file system then return NULL rather than calling + * read_buf and panicing when we get an error from the + * driver. + */ + if ((imap->im_blkno + imap->im_len) > + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { + xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " + "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " + " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", + (unsigned long long) imap->im_blkno, + (unsigned long long) imap->im_len, + XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); + return EINVAL; + } return 0; } @@ -2826,38 +2782,41 @@ xfs_iunpin( } /* - * This is called to wait for the given inode to be unpinned. - * It will sleep until this happens. The caller must have the - * inode locked in at least shared mode so that the buffer cannot - * be subsequently pinned once someone is waiting for it to be - * unpinned. + * This is called to unpin an inode. It can be directed to wait or to return + * immediately without waiting for the inode to be unpinned. The caller must + * have the inode locked in at least shared mode so that the buffer cannot be + * subsequently pinned once someone is waiting for it to be unpinned. */ STATIC void -xfs_iunpin_wait( - xfs_inode_t *ip) +__xfs_iunpin_wait( + xfs_inode_t *ip, + int wait) { - xfs_inode_log_item_t *iip; - xfs_lsn_t lsn; + xfs_inode_log_item_t *iip = ip->i_itemp; ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); - - if (atomic_read(&ip->i_pincount) == 0) { + if (atomic_read(&ip->i_pincount) == 0) return; - } - iip = ip->i_itemp; - if (iip && iip->ili_last_lsn) { - lsn = iip->ili_last_lsn; - } else { - lsn = (xfs_lsn_t)0; - } + /* Give the log a push to start the unpinning I/O */ + xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? + iip->ili_last_lsn : 0, XFS_LOG_FORCE); + if (wait) + wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); +} - /* - * Give the log a push so we don't wait here too long. - */ - xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); +static inline void +xfs_iunpin_wait( + xfs_inode_t *ip) +{ + __xfs_iunpin_wait(ip, 1); +} - wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); +static inline void +xfs_iunpin_nowait( + xfs_inode_t *ip) +{ + __xfs_iunpin_wait(ip, 0); } @@ -3027,6 +2986,145 @@ xfs_iflush_fork( return 0; } +STATIC int +xfs_iflush_cluster( + xfs_inode_t *ip, + xfs_buf_t *bp) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + unsigned long first_index, mask; + int ilist_size; + xfs_inode_t **ilist; + xfs_inode_t *iq; + int nr_found; + int clcount = 0; + int bufwasdelwri; + int i; + + ASSERT(pag->pagi_inodeok); + ASSERT(pag->pag_ici_init); + + ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); + ilist = kmem_alloc(ilist_size, KM_MAYFAIL); + if (!ilist) + return 0; + + mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; + read_lock(&pag->pag_ici_lock); + /* really need a gang lookup range call here */ + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, + first_index, + XFS_INODE_CLUSTER_SIZE(mp)); + if (nr_found == 0) + goto out_free; + + for (i = 0; i < nr_found; i++) { + iq = ilist[i]; + if (iq == ip) + continue; + /* if the inode lies outside this cluster, we're done. */ + if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) + break; + /* + * Do an un-protected check to see if the inode is dirty and + * is a candidate for flushing. These checks will be repeated + * later after the appropriate locks are acquired. + */ + if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) + continue; + + /* + * Try to get locks. If any are unavailable or it is pinned, + * then this inode cannot be flushed and is skipped. + */ + + if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) + continue; + if (!xfs_iflock_nowait(iq)) { + xfs_iunlock(iq, XFS_ILOCK_SHARED); + continue; + } + if (xfs_ipincount(iq)) { + xfs_ifunlock(iq); + xfs_iunlock(iq, XFS_ILOCK_SHARED); + continue; + } + + /* + * arriving here means that this inode can be flushed. First + * re-check that it's dirty before flushing. + */ + if (!xfs_inode_clean(iq)) { + int error; + error = xfs_iflush_int(iq, bp); + if (error) { + xfs_iunlock(iq, XFS_ILOCK_SHARED); + goto cluster_corrupt_out; + } + clcount++; + } else { + xfs_ifunlock(iq); + } + xfs_iunlock(iq, XFS_ILOCK_SHARED); + } + + if (clcount) { + XFS_STATS_INC(xs_icluster_flushcnt); + XFS_STATS_ADD(xs_icluster_flushinode, clcount); + } + +out_free: + read_unlock(&pag->pag_ici_lock); + kmem_free(ilist, ilist_size); + return 0; + + +cluster_corrupt_out: + /* + * Corruption detected in the clustering loop. Invalidate the + * inode buffer and shut down the filesystem. + */ + read_unlock(&pag->pag_ici_lock); + /* + * Clean up the buffer. If it was B_DELWRI, just release it -- + * brelse can handle it with no problems. If not, shut down the + * filesystem before releasing the buffer. + */ + bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); + if (bufwasdelwri) + xfs_buf_relse(bp); + + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + + if (!bufwasdelwri) { + /* + * Just like incore_relse: if we have b_iodone functions, + * mark the buffer as an error and call them. Otherwise + * mark it as stale and brelse. + */ + if (XFS_BUF_IODONE_FUNC(bp)) { + XFS_BUF_CLR_BDSTRAT_FUNC(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + XFS_BUF_SHUT(bp); + XFS_BUF_ERROR(bp,EIO); + xfs_biodone(bp); + } else { + XFS_BUF_STALE(bp); + xfs_buf_relse(bp); + } + } + + /* + * Unlocks the flush lock + */ + xfs_iflush_abort(iq); + kmem_free(ilist, ilist_size); + return XFS_ERROR(EFSCORRUPTED); +} + /* * xfs_iflush() will write a modified inode's changes out to the * inode's on disk home. The caller must have the inode lock held @@ -3046,11 +3144,7 @@ xfs_iflush( xfs_dinode_t *dip; xfs_mount_t *mp; int error; - /* REFERENCED */ - xfs_inode_t *iq; - int clcount; /* count of inodes clustered */ - int bufwasdelwri; - struct hlist_node *entry; + int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; XFS_STATS_INC(xs_iflush_count); @@ -3067,8 +3161,7 @@ xfs_iflush( * If the inode isn't dirty, then just release the inode * flush lock and do nothing. */ - if ((ip->i_update_core == 0) && - ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { + if (xfs_inode_clean(ip)) { ASSERT((iip != NULL) ? !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); xfs_ifunlock(ip); @@ -3076,11 +3169,21 @@ xfs_iflush( } /* - * We can't flush the inode until it is unpinned, so - * wait for it. We know noone new can pin it, because - * we are holding the inode lock shared and you need - * to hold it exclusively to pin the inode. + * We can't flush the inode until it is unpinned, so wait for it if we + * are allowed to block. We know noone new can pin it, because we are + * holding the inode lock shared and you need to hold it exclusively to + * pin the inode. + * + * If we are not allowed to block, force the log out asynchronously so + * that when we come back the inode will be unpinned. If other inodes + * in the same cluster are dirty, they will probably write the inode + * out for us if they occur after the log force completes. */ + if (noblock && xfs_ipincount(ip)) { + xfs_iunpin_nowait(ip); + xfs_ifunlock(ip); + return EAGAIN; + } xfs_iunpin_wait(ip); /* @@ -3097,15 +3200,6 @@ xfs_iflush( } /* - * Get the buffer containing the on-disk inode. - */ - error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); - if (error) { - xfs_ifunlock(ip); - return error; - } - - /* * Decide how buffer will be flushed out. This is done before * the call to xfs_iflush_int because this field is zeroed by it. */ @@ -3121,6 +3215,7 @@ xfs_iflush( case XFS_IFLUSH_DELWRI_ELSE_SYNC: flags = 0; break; + case XFS_IFLUSH_ASYNC_NOBLOCK: case XFS_IFLUSH_ASYNC: case XFS_IFLUSH_DELWRI_ELSE_ASYNC: flags = INT_ASYNC; @@ -3140,6 +3235,7 @@ xfs_iflush( case XFS_IFLUSH_DELWRI: flags = INT_DELWRI; break; + case XFS_IFLUSH_ASYNC_NOBLOCK: case XFS_IFLUSH_ASYNC: flags = INT_ASYNC; break; @@ -3154,89 +3250,36 @@ xfs_iflush( } /* - * First flush out the inode that xfs_iflush was called with. + * Get the buffer containing the on-disk inode. */ - error = xfs_iflush_int(ip, bp); - if (error) { - goto corrupt_out; + error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, + noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); + if (error || !bp) { + xfs_ifunlock(ip); + return error; } /* - * inode clustering: - * see if other inodes can be gathered into this write + * First flush out the inode that xfs_iflush was called with. */ - spin_lock(&ip->i_cluster->icl_lock); - ip->i_cluster->icl_buf = bp; - - clcount = 0; - hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { - if (iq == ip) - continue; - - /* - * Do an un-protected check to see if the inode is dirty and - * is a candidate for flushing. These checks will be repeated - * later after the appropriate locks are acquired. - */ - iip = iq->i_itemp; - if ((iq->i_update_core == 0) && - ((iip == NULL) || - !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && - xfs_ipincount(iq) == 0) { - continue; - } - - /* - * Try to get locks. If any are unavailable, - * then this inode cannot be flushed and is skipped. - */ - - /* get inode locks (just i_lock) */ - if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { - /* get inode flush lock */ - if (xfs_iflock_nowait(iq)) { - /* check if pinned */ - if (xfs_ipincount(iq) == 0) { - /* arriving here means that - * this inode can be flushed. - * first re-check that it's - * dirty - */ - iip = iq->i_itemp; - if ((iq->i_update_core != 0)|| - ((iip != NULL) && - (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { - clcount++; - error = xfs_iflush_int(iq, bp); - if (error) { - xfs_iunlock(iq, - XFS_ILOCK_SHARED); - goto cluster_corrupt_out; - } - } else { - xfs_ifunlock(iq); - } - } else { - xfs_ifunlock(iq); - } - } - xfs_iunlock(iq, XFS_ILOCK_SHARED); - } - } - spin_unlock(&ip->i_cluster->icl_lock); - - if (clcount) { - XFS_STATS_INC(xs_icluster_flushcnt); - XFS_STATS_ADD(xs_icluster_flushinode, clcount); - } + error = xfs_iflush_int(ip, bp); + if (error) + goto corrupt_out; /* - * If the buffer is pinned then push on the log so we won't + * If the buffer is pinned then push on the log now so we won't * get stuck waiting in the write for too long. */ - if (XFS_BUF_ISPINNED(bp)){ + if (XFS_BUF_ISPINNED(bp)) xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); - } + + /* + * inode clustering: + * see if other inodes can be gathered into this write + */ + error = xfs_iflush_cluster(ip, bp); + if (error) + goto cluster_corrupt_out; if (flags & INT_DELWRI) { xfs_bdwrite(mp, bp); @@ -3250,52 +3293,11 @@ xfs_iflush( corrupt_out: xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - xfs_iflush_abort(ip); - /* - * Unlocks the flush lock - */ - return XFS_ERROR(EFSCORRUPTED); - cluster_corrupt_out: - /* Corruption detected in the clustering loop. Invalidate the - * inode buffer and shut down the filesystem. - */ - spin_unlock(&ip->i_cluster->icl_lock); - - /* - * Clean up the buffer. If it was B_DELWRI, just release it -- - * brelse can handle it with no problems. If not, shut down the - * filesystem before releasing the buffer. - */ - if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { - xfs_buf_relse(bp); - } - - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - - if(!bufwasdelwri) { - /* - * Just like incore_relse: if we have b_iodone functions, - * mark the buffer as an error and call them. Otherwise - * mark it as stale and brelse. - */ - if (XFS_BUF_IODONE_FUNC(bp)) { - XFS_BUF_CLR_BDSTRAT_FUNC(bp); - XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); - XFS_BUF_SHUT(bp); - XFS_BUF_ERROR(bp,EIO); - xfs_biodone(bp); - } else { - XFS_BUF_STALE(bp); - xfs_buf_relse(bp); - } - } - - xfs_iflush_abort(iq); /* * Unlocks the flush lock */ + xfs_iflush_abort(ip); return XFS_ERROR(EFSCORRUPTED); } @@ -3325,8 +3327,7 @@ xfs_iflush_int( * If the inode isn't dirty, then just release the inode * flush lock and do nothing. */ - if ((ip->i_update_core == 0) && - ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { + if (xfs_inode_clean(ip)) { xfs_ifunlock(ip); return 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index bfcd72cbaeea..93c37697a72c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -133,19 +133,6 @@ typedef struct dm_attrs_s { } dm_attrs_t; /* - * This is the xfs inode cluster structure. This structure is used by - * xfs_iflush to find inodes that share a cluster and can be flushed to disk at - * the same time. - */ -typedef struct xfs_icluster { - struct hlist_head icl_inodes; /* list of inodes on cluster */ - xfs_daddr_t icl_blkno; /* starting block number of - * the cluster */ - struct xfs_buf *icl_buf; /* the inode buffer */ - spinlock_t icl_lock; /* inode list lock */ -} xfs_icluster_t; - -/* * This is the xfs in-core inode structure. * Most of the on-disk inode is embedded in the i_d field. * @@ -240,10 +227,6 @@ typedef struct xfs_inode { atomic_t i_pincount; /* inode pin count */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ spinlock_t i_flags_lock; /* inode i_flags lock */ -#ifdef HAVE_REFCACHE - struct xfs_inode **i_refcache; /* ptr to entry in ref cache */ - struct xfs_inode *i_release; /* inode to unref */ -#endif /* Miscellaneous state. */ unsigned short i_flags; /* see defined flags below */ unsigned char i_update_core; /* timestamps/size is dirty */ @@ -252,8 +235,6 @@ typedef struct xfs_inode { unsigned int i_delayed_blks; /* count of delay alloc blks */ xfs_icdinode_t i_d; /* most of ondisk inode */ - xfs_icluster_t *i_cluster; /* cluster list header */ - struct hlist_node i_cnode; /* cluster link node */ xfs_fsize_t i_size; /* in-memory size */ xfs_fsize_t i_new_size; /* size when write completes */ @@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) #define XFS_IFLUSH_SYNC 3 #define XFS_IFLUSH_ASYNC 4 #define XFS_IFLUSH_DELWRI 5 +#define XFS_IFLUSH_ASYNC_NOBLOCK 6 /* * Flags for xfs_itruncate_start(). @@ -515,7 +497,7 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int); */ int xfs_itobp(struct xfs_mount *, struct xfs_trans *, xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **, - xfs_daddr_t, uint); + xfs_daddr_t, uint, uint); int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); @@ -597,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #define xfs_inobp_check(mp, bp) #endif /* DEBUG */ -extern struct kmem_zone *xfs_icluster_zone; extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index bfe92ea17952..40513077ab36 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w) return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT); } +static inline int xfs_inode_clean(xfs_inode_t *ip) +{ + return (!ip->i_itemp || + !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && + !ip->i_update_core; +} + + #ifdef __KERNEL__ extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *); diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 658aab6b1bbf..38390e7381de 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -614,7 +614,8 @@ xfs_bulkstat( xfs_buf_relse(bp); error = xfs_itobp(mp, NULL, ip, &dip, &bp, bno, - XFS_IMAP_BULKSTAT); + XFS_IMAP_BULKSTAT, + XFS_BUF_LOCK); if (!error) clustidx = ip->i_boffset / mp->m_sb.sb_inodesize; kmem_zone_free(xfs_inode_zone, ip); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a75edca1860f..6439c89826dc 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) spin_lock(&log->l_icloglock); iclog = log->l_iclog; - iclog->ic_refcnt++; + atomic_inc(&iclog->ic_refcnt); spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); (void) xlog_state_release_iclog(log, iclog); @@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) */ spin_lock(&log->l_icloglock); iclog = log->l_iclog; - iclog->ic_refcnt++; + atomic_inc(&iclog->ic_refcnt); spin_unlock(&log->l_icloglock); xlog_state_want_sync(log, iclog); @@ -1405,7 +1405,7 @@ xlog_sync(xlog_t *log, int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); XFS_STATS_INC(xs_log_writes); - ASSERT(iclog->ic_refcnt == 0); + ASSERT(atomic_read(&iclog->ic_refcnt) == 0); /* Add for LR header */ count_init = log->l_iclog_hsize + iclog->ic_offset; @@ -2309,7 +2309,7 @@ xlog_state_done_syncing( ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || iclog->ic_state == XLOG_STATE_IOERROR); - ASSERT(iclog->ic_refcnt == 0); + ASSERT(atomic_read(&iclog->ic_refcnt) == 0); ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); @@ -2391,7 +2391,7 @@ restart: ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); head = &iclog->ic_header; - iclog->ic_refcnt++; /* prevents sync */ + atomic_inc(&iclog->ic_refcnt); /* prevents sync */ log_offset = iclog->ic_offset; /* On the 1st write to an iclog, figure out lsn. This works @@ -2423,12 +2423,12 @@ restart: xlog_state_switch_iclogs(log, iclog, iclog->ic_size); /* If I'm the only one writing to this iclog, sync it to disk */ - if (iclog->ic_refcnt == 1) { + if (atomic_read(&iclog->ic_refcnt) == 1) { spin_unlock(&log->l_icloglock); if ((error = xlog_state_release_iclog(log, iclog))) return error; } else { - iclog->ic_refcnt--; + atomic_dec(&iclog->ic_refcnt); spin_unlock(&log->l_icloglock); } goto restart; @@ -2813,33 +2813,35 @@ xlog_state_put_ticket(xlog_t *log, * */ STATIC int -xlog_state_release_iclog(xlog_t *log, - xlog_in_core_t *iclog) +xlog_state_release_iclog( + xlog_t *log, + xlog_in_core_t *iclog) { int sync = 0; /* do we sync? */ - xlog_assign_tail_lsn(log->l_mp); + if (iclog->ic_state & XLOG_STATE_IOERROR) + return XFS_ERROR(EIO); - spin_lock(&log->l_icloglock); + ASSERT(atomic_read(&iclog->ic_refcnt) > 0); + if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) + return 0; if (iclog->ic_state & XLOG_STATE_IOERROR) { spin_unlock(&log->l_icloglock); return XFS_ERROR(EIO); } - - ASSERT(iclog->ic_refcnt > 0); ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_WANT_SYNC); - if (--iclog->ic_refcnt == 0 && - iclog->ic_state == XLOG_STATE_WANT_SYNC) { + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { + /* update tail before writing to iclog */ + xlog_assign_tail_lsn(log->l_mp); sync++; iclog->ic_state = XLOG_STATE_SYNCING; iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); /* cycle incremented when incrementing curr_block */ } - spin_unlock(&log->l_icloglock); /* @@ -2849,11 +2851,9 @@ xlog_state_release_iclog(xlog_t *log, * this iclog has consistent data, so we ignore IOERROR * flags after this point. */ - if (sync) { + if (sync) return xlog_sync(log, iclog); - } return 0; - } /* xlog_state_release_iclog */ @@ -2953,7 +2953,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) * previous iclog and go to sleep. */ if (iclog->ic_state == XLOG_STATE_DIRTY || - (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { + (atomic_read(&iclog->ic_refcnt) == 0 + && iclog->ic_offset == 0)) { iclog = iclog->ic_prev; if (iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY) @@ -2961,14 +2962,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) else goto maybe_sleep; } else { - if (iclog->ic_refcnt == 0) { + if (atomic_read(&iclog->ic_refcnt) == 0) { /* We are the only one with access to this * iclog. Flush it out now. There should * be a roundoff of zero to show that someone * has already taken care of the roundoff from * the previous sync. */ - iclog->ic_refcnt++; + atomic_inc(&iclog->ic_refcnt); lsn = be64_to_cpu(iclog->ic_header.h_lsn); xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); @@ -3100,7 +3101,7 @@ try_again: already_slept = 1; goto try_again; } else { - iclog->ic_refcnt++; + atomic_inc(&iclog->ic_refcnt); xlog_state_switch_iclogs(log, iclog, 0); spin_unlock(&log->l_icloglock); if (xlog_state_release_iclog(log, iclog)) diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index e008233ee249..8662ce245c1f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields { #endif int ic_size; int ic_offset; - int ic_refcnt; + atomic_t ic_refcnt; int ic_bwritecnt; ushort_t ic_state; char *ic_datap; /* pointer to iclog data */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index b82d5d4d2462..d8a6d3089b16 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3214,7 +3214,8 @@ xlog_recover_process_iunlinks( * next inode in the bucket. */ error = xfs_itobp(mp, NULL, ip, &dip, - &ibp, 0, 0); + &ibp, 0, 0, + XFS_BUF_LOCK); ASSERT(error || (dip != NULL)); } diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 7eb157a59f9e..1c6d40ed6816 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -36,7 +36,6 @@ #include "xfs_bmap.h" #include "xfs_error.h" #include "xfs_quota.h" -#include "xfs_refcache.h" #include "xfs_utils.h" #include "xfs_trans_space.h" #include "xfs_vnodeops.h" @@ -580,10 +579,8 @@ xfs_rename( * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (target_ip != NULL) { - xfs_refcache_purge_ip(target_ip); + if (target_ip != NULL) IRELE(target_ip); - } /* * Let interposed file systems know about removed links. */ diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 60b6b898022b..4e5c010f5040 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -304,7 +304,8 @@ xfs_trans_read_buf( if (tp == NULL) { bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY); if (!bp) - return XFS_ERROR(ENOMEM); + return (flags & XFS_BUF_TRYLOCK) ? + EAGAIN : XFS_ERROR(ENOMEM); if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) { xfs_ioerror_alert("xfs_trans_read_buf", mp, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 413587f02155..2cdf40f64ccc 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -43,7 +43,6 @@ #include "xfs_error.h" #include "xfs_bmap.h" #include "xfs_rw.h" -#include "xfs_refcache.h" #include "xfs_buf_item.h" #include "xfs_log_priv.h" #include "xfs_dir2_trace.h" @@ -113,9 +112,6 @@ xfs_init(void) xfs_ili_zone = kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", KM_ZONE_SPREAD, NULL); - xfs_icluster_zone = - kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster", - KM_ZONE_SPREAD, NULL); /* * Allocate global trace buffers. @@ -153,11 +149,9 @@ xfs_cleanup(void) extern kmem_zone_t *xfs_inode_zone; extern kmem_zone_t *xfs_efd_zone; extern kmem_zone_t *xfs_efi_zone; - extern kmem_zone_t *xfs_icluster_zone; xfs_cleanup_procfs(); xfs_sysctl_unregister(); - xfs_refcache_destroy(); xfs_filestream_uninit(); xfs_mru_cache_uninit(); xfs_acl_zone_destroy(xfs_acl_zone); @@ -189,7 +183,6 @@ xfs_cleanup(void) kmem_zone_destroy(xfs_efi_zone); kmem_zone_destroy(xfs_ifork_zone); kmem_zone_destroy(xfs_ili_zone); - kmem_zone_destroy(xfs_icluster_zone); } /* @@ -585,11 +578,6 @@ xfs_unmount( 0 : DM_FLAGS_UNWANTED; } #endif - /* - * First blow any referenced inode from this file system - * out of the reference cache, and delete the timer. - */ - xfs_refcache_purge_mp(mp); /* * Blow away any referenced inode in the filestreams cache. @@ -653,7 +641,6 @@ xfs_quiesce_fs( { int count = 0, pincount; - xfs_refcache_purge_mp(mp); xfs_flush_buftarg(mp->m_ddev_targp, 0); xfs_finish_reclaim_all(mp, 0); @@ -1324,18 +1311,6 @@ xfs_syncsub( } /* - * If this is the periodic sync, then kick some entries out of - * the reference cache. This ensures that idle entries are - * eventually kicked out of the cache. - */ - if (flags & SYNC_REFCACHE) { - if (flags & SYNC_WAIT) - xfs_refcache_purge_mp(mp); - else - xfs_refcache_purge_some(mp); - } - - /* * If asked, update the disk superblock with incore counter values if we * are using non-persistent counters so that they don't get too far out * of sync if we crash or get a forced shutdown. We don't want to force diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 51305242ff8c..e9d2feb842ed 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -48,7 +48,6 @@ #include "xfs_quota.h" #include "xfs_utils.h" #include "xfs_rtalloc.h" -#include "xfs_refcache.h" #include "xfs_trans_space.h" #include "xfs_log_priv.h" #include "xfs_filestream.h" @@ -1520,12 +1519,6 @@ xfs_release( xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); } -#ifdef HAVE_REFCACHE - /* If we are in the NFS reference cache then don't do this now */ - if (ip->i_refcache) - return 0; -#endif - if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || @@ -2449,14 +2442,6 @@ xfs_remove( } /* - * Before we drop our extra reference to the inode, purge it - * from the refcache if it is there. By waiting until afterwards - * to do the IRELE, we ensure that we won't go inactive in the - * xfs_refcache_purge_ip routine (although that would be OK). - */ - xfs_refcache_purge_ip(ip); - - /* * If we are using filestreams, kill the stream association. * If the file is still open it may get a new one but that * will get killed on last close in xfs_close() so we don't @@ -2495,14 +2480,6 @@ xfs_remove( cancel_flags |= XFS_TRANS_ABORT; xfs_trans_cancel(tp, cancel_flags); - /* - * Before we drop our extra reference to the inode, purge it - * from the refcache if it is there. By waiting until afterwards - * to do the IRELE, we ensure that we won't go inactive in the - * xfs_refcache_purge_ip routine (although that would be OK). - */ - xfs_refcache_purge_ip(ip); - IRELE(ip); goto std_return; @@ -3461,14 +3438,7 @@ xfs_rwunlock( if (S_ISDIR(ip->i_d.di_mode)) return; if (locktype == VRWLOCK_WRITE) { - /* - * In the write case, we may have added a new entry to - * the reference cache. This might store a pointer to - * an inode to be released in this inode. If it is there, - * clear the pointer and release the inode after unlocking - * this one. - */ - xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); } else { ASSERT((locktype == VRWLOCK_READ) || (locktype == VRWLOCK_WRITE_DIRECT)); @@ -3484,7 +3454,6 @@ xfs_inode_flush( int flags) { xfs_mount_t *mp = ip->i_mount; - xfs_inode_log_item_t *iip = ip->i_itemp; int error = 0; if (XFS_FORCED_SHUTDOWN(mp)) @@ -3494,33 +3463,9 @@ xfs_inode_flush( * Bypass inodes which have already been cleaned by * the inode flush clustering code inside xfs_iflush */ - if ((ip->i_update_core == 0) && - ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) + if (xfs_inode_clean(ip)) return 0; - if (flags & FLUSH_LOG) { - if (iip && iip->ili_last_lsn) { - xlog_t *log = mp->m_log; - xfs_lsn_t sync_lsn; - int log_flags = XFS_LOG_FORCE; - - spin_lock(&log->l_grant_lock); - sync_lsn = log->l_last_sync_lsn; - spin_unlock(&log->l_grant_lock); - - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { - if (flags & FLUSH_SYNC) - log_flags |= XFS_LOG_SYNC; - error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); - if (error) - return error; - } - - if (ip->i_update_core == 0) - return 0; - } - } - /* * We make this non-blocking if the inode is contended, * return EAGAIN to indicate to the caller that they @@ -3528,30 +3473,22 @@ xfs_inode_flush( * blocking on inodes inside another operation right * now, they get caught later by xfs_sync. */ - if (flags & FLUSH_INODE) { - int flush_flags; - - if (flags & FLUSH_SYNC) { - xfs_ilock(ip, XFS_ILOCK_SHARED); - xfs_iflock(ip); - } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return EAGAIN; - } - } else { + if (flags & FLUSH_SYNC) { + xfs_ilock(ip, XFS_ILOCK_SHARED); + xfs_iflock(ip); + } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_SHARED); return EAGAIN; } - - if (flags & FLUSH_SYNC) - flush_flags = XFS_IFLUSH_SYNC; - else - flush_flags = XFS_IFLUSH_ASYNC; - - error = xfs_iflush(ip, flush_flags); - xfs_iunlock(ip, XFS_ILOCK_SHARED); + } else { + return EAGAIN; } + error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC + : XFS_IFLUSH_ASYNC_NOBLOCK); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return error; } @@ -3694,12 +3631,12 @@ xfs_finish_reclaim( * We get the flush lock regardless, though, just to make sure * we don't free it while it is being flushed. */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - if (!locked) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_iflock(ip); - } + if (!locked) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); + } + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { if (ip->i_update_core || ((ip->i_itemp != NULL) && (ip->i_itemp->ili_format.ilf_fields != 0))) { @@ -3719,17 +3656,11 @@ xfs_finish_reclaim( ASSERT(ip->i_update_core == 0); ASSERT(ip->i_itemp == NULL || ip->i_itemp->ili_format.ilf_fields == 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } else if (locked) { - /* - * We are not interested in doing an iflush if we're - * in the process of shutting down the filesystem forcibly. - * So, just reclaim the inode. - */ - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); } + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + reclaim: xfs_ireclaim(ip); return 0; |