summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2008-02-22 13:33:44 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2008-02-22 13:33:44 +1100
commit90c6fd184c8e56091e08119b97fdde1c0d133a44 (patch)
tree52700522c82540e8d2c883a0c505b55840a5cb12 /fs
parent867945496e8ad0cfe4e85aa1a00d1377efaeeafd (diff)
parent5e9ff1b89bd0c6ede15f7d9cd0818db07eb84d7f (diff)
Merge commit 'xfs/master'
Conflicts: fs/xfs/Makefile-linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h5
-rw-r--r--fs/xfs/xfs_bmap.c4
-rw-r--r--fs/xfs/xfs_iget.c49
-rw-r--r--fs/xfs/xfs_inode.c655
-rw-r--r--fs/xfs/xfs_inode.h23
-rw-r--r--fs/xfs/xfs_inode_item.h8
-rw-r--r--fs/xfs/xfs_itable.c3
-rw-r--r--fs/xfs/xfs_log.c47
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_log_recover.c3
-rw-r--r--fs/xfs/xfs_rename.c5
-rw-r--r--fs/xfs/xfs_trans_buf.c3
-rw-r--r--fs/xfs/xfs_vfsops.c25
-rw-r--r--fs/xfs/xfs_vnodeops.c113
16 files changed, 402 insertions, 547 deletions
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 3ca39c4e5d2a..e5143323e71f 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -99,7 +99,6 @@
/*
* Feature macros (disable/enable)
*/
-#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
#ifdef CONFIG_SMP
#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 21dfc9da235e..df24c6fe4ce4 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -896,7 +896,8 @@ xfs_fs_write_inode(
struct inode *inode,
int sync)
{
- int error = 0, flags = FLUSH_INODE;
+ int error = 0;
+ int flags = 0;
xfs_itrace_entry(XFS_I(inode));
if (sync) {
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index b5ea418693b1..f200e0244082 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -73,12 +73,9 @@ typedef enum bhv_vrwlock {
#define IO_INVIS 0x00020 /* don't update inode timestamps */
/*
- * Flags for vop_iflush call
+ * Flags for xfs_inode_flush
*/
#define FLUSH_SYNC 1 /* wait for flush to complete */
-#define FLUSH_INODE 2 /* flush the inode itself */
-#define FLUSH_LOG 4 /* force the last log entry for
- * this inode out to disk */
/*
* Flush/Invalidate options for vop_toss/flush/flushinval_pages.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1c0a5a585a82..759b75b90b59 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5869,6 +5869,10 @@ xfs_getbmap(
/* xfs_fsize_t last_byte = xfs_file_last_byte(ip); */
error = xfs_flush_pages(ip, (xfs_off_t)0,
-1, 0, FI_REMAPF);
+ if (error) {
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return error;
+ }
}
ASSERT(whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f01b07687faf..a959e3336931 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -78,7 +78,6 @@ xfs_iget_core(
xfs_inode_t *ip;
xfs_inode_t *iq;
int error;
- xfs_icluster_t *icl, *new_icl = NULL;
unsigned long first_index, mask;
xfs_perag_t *pag;
xfs_agino_t agino;
@@ -229,11 +228,9 @@ finish_inode:
}
/*
- * This is a bit messy - we preallocate everything we _might_
- * need before we pick up the ici lock. That way we don't have to
- * juggle locks and go all the way back to the start.
+ * Preload the radix tree so we can insert safely under the
+ * write spinlock.
*/
- new_icl = kmem_zone_alloc(xfs_icluster_zone, KM_SLEEP);
if (radix_tree_preload(GFP_KERNEL)) {
delay(1);
goto again;
@@ -241,17 +238,6 @@ finish_inode:
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = agino & mask;
write_lock(&pag->pag_ici_lock);
-
- /*
- * Find the cluster if it exists
- */
- icl = NULL;
- if (radix_tree_gang_lookup(&pag->pag_ici_root, (void**)&iq,
- first_index, 1)) {
- if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) == first_index)
- icl = iq->i_cluster;
- }
-
/*
* insert the new inode
*/
@@ -266,30 +252,13 @@ finish_inode:
}
/*
- * These values _must_ be set before releasing ihlock!
+ * These values _must_ be set before releasing the radix tree lock!
*/
ip->i_udquot = ip->i_gdquot = NULL;
xfs_iflags_set(ip, XFS_INEW);
- ASSERT(ip->i_cluster == NULL);
-
- if (!icl) {
- spin_lock_init(&new_icl->icl_lock);
- INIT_HLIST_HEAD(&new_icl->icl_inodes);
- icl = new_icl;
- new_icl = NULL;
- } else {
- ASSERT(!hlist_empty(&icl->icl_inodes));
- }
- spin_lock(&icl->icl_lock);
- hlist_add_head(&ip->i_cnode, &icl->icl_inodes);
- ip->i_cluster = icl;
- spin_unlock(&icl->icl_lock);
-
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
- if (new_icl)
- kmem_zone_free(xfs_icluster_zone, new_icl);
/*
* Link ip to its mount and thread it on the mount's inode list.
@@ -528,18 +497,6 @@ xfs_iextract(
xfs_put_perag(mp, pag);
/*
- * Remove from cluster list
- */
- mp = ip->i_mount;
- spin_lock(&ip->i_cluster->icl_lock);
- hlist_del(&ip->i_cnode);
- spin_unlock(&ip->i_cluster->icl_lock);
-
- /* was last inode in cluster? */
- if (hlist_empty(&ip->i_cluster->icl_inodes))
- kmem_zone_free(xfs_icluster_zone, ip->i_cluster);
-
- /*
* Remove from mount's inode list.
*/
XFS_MOUNT_ILOCK(mp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a550546a7083..4e23a9bd5106 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,7 +55,6 @@
kmem_zone_t *xfs_ifork_zone;
kmem_zone_t *xfs_inode_zone;
-kmem_zone_t *xfs_icluster_zone;
/*
* Used in xfs_itruncate(). This is the maximum number of extents
@@ -126,6 +125,90 @@ xfs_inobp_check(
#endif
/*
+ * Find the buffer associated with the given inode map
+ * We do basic validation checks on the buffer once it has been
+ * retrieved from disk.
+ */
+STATIC int
+xfs_imap_to_bp(
+ xfs_mount_t *mp,
+ xfs_trans_t *tp,
+ xfs_imap_t *imap,
+ xfs_buf_t **bpp,
+ uint buf_flags,
+ uint imap_flags)
+{
+ int error;
+ int i;
+ int ni;
+ xfs_buf_t *bp;
+
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
+ (int)imap->im_len, buf_flags, &bp);
+ if (error) {
+ if (error != EAGAIN) {
+ cmn_err(CE_WARN,
+ "xfs_imap_to_bp: xfs_trans_read_buf()returned "
+ "an error %d on %s. Returning error.",
+ error, mp->m_fsname);
+ } else {
+ ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+ }
+ return error;
+ }
+
+ /*
+ * Validate the magic number and version of every inode in the buffer
+ * (if DEBUG kernel) or the first inode in the buffer, otherwise.
+ */
+#ifdef DEBUG
+ ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog;
+#else /* usual case */
+ ni = 1;
+#endif
+
+ for (i = 0; i < ni; i++) {
+ int di_ok;
+ xfs_dinode_t *dip;
+
+ dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+ (i << mp->m_sb.sb_inodelog));
+ di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
+ XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
+ if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
+ XFS_ERRTAG_ITOBP_INOTOBP,
+ XFS_RANDOM_ITOBP_INOTOBP))) {
+ if (imap_flags & XFS_IMAP_BULKSTAT) {
+ xfs_trans_brelse(tp, bp);
+ return XFS_ERROR(EINVAL);
+ }
+ XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
+ XFS_ERRLEVEL_HIGH, mp, dip);
+#ifdef DEBUG
+ cmn_err(CE_PANIC,
+ "Device %s - bad inode magic/vsn "
+ "daddr %lld #%d (magic=%x)",
+ XFS_BUFTARG_NAME(mp->m_ddev_targp),
+ (unsigned long long)imap->im_blkno, i,
+ be16_to_cpu(dip->di_core.di_magic));
+#endif
+ xfs_trans_brelse(tp, bp);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+ }
+
+ xfs_inobp_check(mp, bp);
+
+ /*
+ * Mark the buffer as an inode buffer now that it looks good
+ */
+ XFS_BUF_SET_VTYPE(bp, B_FS_INO);
+
+ *bpp = bp;
+ return 0;
+}
+
+/*
* This routine is called to map an inode number within a file
* system to the buffer containing the on-disk version of the
* inode. It returns a pointer to the buffer containing the
@@ -147,72 +230,19 @@ xfs_inotobp(
xfs_buf_t **bpp,
int *offset)
{
- int di_ok;
xfs_imap_t imap;
xfs_buf_t *bp;
int error;
- xfs_dinode_t *dip;
- /*
- * Call the space management code to find the location of the
- * inode on disk.
- */
imap.im_blkno = 0;
error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP);
- if (error != 0) {
- cmn_err(CE_WARN,
- "xfs_inotobp: xfs_imap() returned an "
- "error %d on %s. Returning error.", error, mp->m_fsname);
+ if (error)
return error;
- }
-
- /*
- * If the inode number maps to a block outside the bounds of the
- * file system then return NULL rather than calling read_buf
- * and panicing when we get an error from the driver.
- */
- if ((imap.im_blkno + imap.im_len) >
- XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
- cmn_err(CE_WARN,
- "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds "
- "of the file system %s. Returning EINVAL.",
- (unsigned long long)imap.im_blkno,
- imap.im_len, mp->m_fsname);
- return XFS_ERROR(EINVAL);
- }
- /*
- * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will
- * default to just a read_buf() call.
- */
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
- (int)imap.im_len, XFS_BUF_LOCK, &bp);
-
- if (error) {
- cmn_err(CE_WARN,
- "xfs_inotobp: xfs_trans_read_buf() returned an "
- "error %d on %s. Returning error.", error, mp->m_fsname);
+ error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0);
+ if (error)
return error;
- }
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0);
- di_ok =
- be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
- XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
- if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP,
- XFS_RANDOM_ITOBP_INOTOBP))) {
- XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip);
- xfs_trans_brelse(tp, bp);
- cmn_err(CE_WARN,
- "xfs_inotobp: XFS_TEST_ERROR() returned an "
- "error on %s. Returning EFSCORRUPTED.", mp->m_fsname);
- return XFS_ERROR(EFSCORRUPTED);
- }
-
- xfs_inobp_check(mp, bp);
- /*
- * Set *dipp to point to the on-disk inode in the buffer.
- */
*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
*bpp = bp;
*offset = imap.im_boffset;
@@ -248,46 +278,21 @@ xfs_itobp(
xfs_dinode_t **dipp,
xfs_buf_t **bpp,
xfs_daddr_t bno,
- uint imap_flags)
+ uint imap_flags,
+ uint buf_flags)
{
xfs_imap_t imap;
xfs_buf_t *bp;
int error;
- int i;
- int ni;
if (ip->i_blkno == (xfs_daddr_t)0) {
- /*
- * Call the space management code to find the location of the
- * inode on disk.
- */
imap.im_blkno = bno;
- if ((error = xfs_imap(mp, tp, ip->i_ino, &imap,
- XFS_IMAP_LOOKUP | imap_flags)))
+ error = xfs_imap(mp, tp, ip->i_ino, &imap,
+ XFS_IMAP_LOOKUP | imap_flags);
+ if (error)
return error;
/*
- * If the inode number maps to a block outside the bounds
- * of the file system then return NULL rather than calling
- * read_buf and panicing when we get an error from the
- * driver.
- */
- if ((imap.im_blkno + imap.im_len) >
- XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
-#ifdef DEBUG
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
- "(imap.im_blkno (0x%llx) "
- "+ imap.im_len (0x%llx)) > "
- " XFS_FSB_TO_BB(mp, "
- "mp->m_sb.sb_dblocks) (0x%llx)",
- (unsigned long long) imap.im_blkno,
- (unsigned long long) imap.im_len,
- XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
-#endif /* DEBUG */
- return XFS_ERROR(EINVAL);
- }
-
- /*
* Fill in the fields in the inode that will be used to
* map the inode to its buffer from now on.
*/
@@ -305,76 +310,17 @@ xfs_itobp(
}
ASSERT(bno == 0 || bno == imap.im_blkno);
- /*
- * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will
- * default to just a read_buf() call.
- */
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno,
- (int)imap.im_len, XFS_BUF_LOCK, &bp);
- if (error) {
-#ifdef DEBUG
- xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: "
- "xfs_trans_read_buf() returned error %d, "
- "imap.im_blkno 0x%llx, imap.im_len 0x%llx",
- error, (unsigned long long) imap.im_blkno,
- (unsigned long long) imap.im_len);
-#endif /* DEBUG */
+ error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags);
+ if (error)
return error;
- }
- /*
- * Validate the magic number and version of every inode in the buffer
- * (if DEBUG kernel) or the first inode in the buffer, otherwise.
- * No validation is done here in userspace (xfs_repair).
- */
-#if !defined(__KERNEL__)
- ni = 0;
-#elif defined(DEBUG)
- ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog;
-#else /* usual case */
- ni = 1;
-#endif
-
- for (i = 0; i < ni; i++) {
- int di_ok;
- xfs_dinode_t *dip;
-
- dip = (xfs_dinode_t *)xfs_buf_offset(bp,
- (i << mp->m_sb.sb_inodelog));
- di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC &&
- XFS_DINODE_GOOD_VERSION(dip->di_core.di_version);
- if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
- XFS_ERRTAG_ITOBP_INOTOBP,
- XFS_RANDOM_ITOBP_INOTOBP))) {
- if (imap_flags & XFS_IMAP_BULKSTAT) {
- xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EINVAL);
- }
-#ifdef DEBUG
- cmn_err(CE_ALERT,
- "Device %s - bad inode magic/vsn "
- "daddr %lld #%d (magic=%x)",
- XFS_BUFTARG_NAME(mp->m_ddev_targp),
- (unsigned long long)imap.im_blkno, i,
- be16_to_cpu(dip->di_core.di_magic));
-#endif
- XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH,
- mp, dip);
- xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EFSCORRUPTED);
- }
+ if (!bp) {
+ ASSERT(buf_flags & XFS_BUF_TRYLOCK);
+ ASSERT(tp == NULL);
+ *bpp = NULL;
+ return EAGAIN;
}
- xfs_inobp_check(mp, bp);
-
- /*
- * Mark the buffer as an inode buffer now that it looks good
- */
- XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-
- /*
- * Set *dipp to point to the on-disk inode in the buffer.
- */
*dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
*bpp = bp;
return 0;
@@ -878,7 +824,7 @@ xfs_iread(
* return NULL as well. Set i_blkno to 0 so that xfs_itobp() will
* know that this is a new incore inode.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags);
+ error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK);
if (error) {
kmem_zone_free(xfs_inode_zone, ip);
return error;
@@ -1967,7 +1913,7 @@ xfs_iunlink(
* Here we put the head pointer into our next pointer,
* and then we fall through to point the head at us.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+ error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
if (error)
return error;
@@ -2075,7 +2021,7 @@ xfs_iunlink_remove(
* of dealing with the buffer when there is no need to
* change it.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+ error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
if (error) {
cmn_err(CE_WARN,
"xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -2137,7 +2083,7 @@ xfs_iunlink_remove(
* Now last_ibp points to the buffer previous to us on
* the unlinked list. Pull us from the list.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0);
+ error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
if (error) {
cmn_err(CE_WARN,
"xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.",
@@ -2172,13 +2118,6 @@ xfs_iunlink_remove(
return 0;
}
-STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
-{
- return (((ip->i_itemp == NULL) ||
- !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
- (ip->i_update_core == 0));
-}
-
STATIC void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
@@ -2400,7 +2339,7 @@ xfs_ifree(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0);
+ error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK);
if (error)
return error;
@@ -2678,14 +2617,31 @@ xfs_imap(
fsbno = imap->im_blkno ?
XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK;
error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags);
- if (error != 0) {
+ if (error)
return error;
- }
+
imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno);
imap->im_len = XFS_FSB_TO_BB(mp, len);
imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno);
imap->im_ioffset = (ushort)off;
imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog);
+
+ /*
+ * If the inode number maps to a block outside the bounds
+ * of the file system then return NULL rather than calling
+ * read_buf and panicing when we get an error from the
+ * driver.
+ */
+ if ((imap->im_blkno + imap->im_len) >
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
+ xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: "
+ "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > "
+ " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)",
+ (unsigned long long) imap->im_blkno,
+ (unsigned long long) imap->im_len,
+ XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
+ return EINVAL;
+ }
return 0;
}
@@ -2826,38 +2782,41 @@ xfs_iunpin(
}
/*
- * This is called to wait for the given inode to be unpinned.
- * It will sleep until this happens. The caller must have the
- * inode locked in at least shared mode so that the buffer cannot
- * be subsequently pinned once someone is waiting for it to be
- * unpinned.
+ * This is called to unpin an inode. It can be directed to wait or to return
+ * immediately without waiting for the inode to be unpinned. The caller must
+ * have the inode locked in at least shared mode so that the buffer cannot be
+ * subsequently pinned once someone is waiting for it to be unpinned.
*/
STATIC void
-xfs_iunpin_wait(
- xfs_inode_t *ip)
+__xfs_iunpin_wait(
+ xfs_inode_t *ip,
+ int wait)
{
- xfs_inode_log_item_t *iip;
- xfs_lsn_t lsn;
+ xfs_inode_log_item_t *iip = ip->i_itemp;
ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
-
- if (atomic_read(&ip->i_pincount) == 0) {
+ if (atomic_read(&ip->i_pincount) == 0)
return;
- }
- iip = ip->i_itemp;
- if (iip && iip->ili_last_lsn) {
- lsn = iip->ili_last_lsn;
- } else {
- lsn = (xfs_lsn_t)0;
- }
+ /* Give the log a push to start the unpinning I/O */
+ xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ?
+ iip->ili_last_lsn : 0, XFS_LOG_FORCE);
+ if (wait)
+ wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+}
- /*
- * Give the log a push so we don't wait here too long.
- */
- xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE);
+static inline void
+xfs_iunpin_wait(
+ xfs_inode_t *ip)
+{
+ __xfs_iunpin_wait(ip, 1);
+}
- wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
+static inline void
+xfs_iunpin_nowait(
+ xfs_inode_t *ip)
+{
+ __xfs_iunpin_wait(ip, 0);
}
@@ -3027,6 +2986,145 @@ xfs_iflush_fork(
return 0;
}
+STATIC int
+xfs_iflush_cluster(
+ xfs_inode_t *ip,
+ xfs_buf_t *bp)
+{
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
+ unsigned long first_index, mask;
+ int ilist_size;
+ xfs_inode_t **ilist;
+ xfs_inode_t *iq;
+ int nr_found;
+ int clcount = 0;
+ int bufwasdelwri;
+ int i;
+
+ ASSERT(pag->pagi_inodeok);
+ ASSERT(pag->pag_ici_init);
+
+ ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *);
+ ilist = kmem_alloc(ilist_size, KM_MAYFAIL);
+ if (!ilist)
+ return 0;
+
+ mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
+ read_lock(&pag->pag_ici_lock);
+ /* really need a gang lookup range call here */
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+ first_index,
+ XFS_INODE_CLUSTER_SIZE(mp));
+ if (nr_found == 0)
+ goto out_free;
+
+ for (i = 0; i < nr_found; i++) {
+ iq = ilist[i];
+ if (iq == ip)
+ continue;
+ /* if the inode lies outside this cluster, we're done. */
+ if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index)
+ break;
+ /*
+ * Do an un-protected check to see if the inode is dirty and
+ * is a candidate for flushing. These checks will be repeated
+ * later after the appropriate locks are acquired.
+ */
+ if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+ continue;
+
+ /*
+ * Try to get locks. If any are unavailable or it is pinned,
+ * then this inode cannot be flushed and is skipped.
+ */
+
+ if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+ continue;
+ if (!xfs_iflock_nowait(iq)) {
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ continue;
+ }
+ if (xfs_ipincount(iq)) {
+ xfs_ifunlock(iq);
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ continue;
+ }
+
+ /*
+ * arriving here means that this inode can be flushed. First
+ * re-check that it's dirty before flushing.
+ */
+ if (!xfs_inode_clean(iq)) {
+ int error;
+ error = xfs_iflush_int(iq, bp);
+ if (error) {
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ goto cluster_corrupt_out;
+ }
+ clcount++;
+ } else {
+ xfs_ifunlock(iq);
+ }
+ xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ }
+
+ if (clcount) {
+ XFS_STATS_INC(xs_icluster_flushcnt);
+ XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+ }
+
+out_free:
+ read_unlock(&pag->pag_ici_lock);
+ kmem_free(ilist, ilist_size);
+ return 0;
+
+
+cluster_corrupt_out:
+ /*
+ * Corruption detected in the clustering loop. Invalidate the
+ * inode buffer and shut down the filesystem.
+ */
+ read_unlock(&pag->pag_ici_lock);
+ /*
+ * Clean up the buffer. If it was B_DELWRI, just release it --
+ * brelse can handle it with no problems. If not, shut down the
+ * filesystem before releasing the buffer.
+ */
+ bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp);
+ if (bufwasdelwri)
+ xfs_buf_relse(bp);
+
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+
+ if (!bufwasdelwri) {
+ /*
+ * Just like incore_relse: if we have b_iodone functions,
+ * mark the buffer as an error and call them. Otherwise
+ * mark it as stale and brelse.
+ */
+ if (XFS_BUF_IODONE_FUNC(bp)) {
+ XFS_BUF_CLR_BDSTRAT_FUNC(bp);
+ XFS_BUF_UNDONE(bp);
+ XFS_BUF_STALE(bp);
+ XFS_BUF_SHUT(bp);
+ XFS_BUF_ERROR(bp,EIO);
+ xfs_biodone(bp);
+ } else {
+ XFS_BUF_STALE(bp);
+ xfs_buf_relse(bp);
+ }
+ }
+
+ /*
+ * Unlocks the flush lock
+ */
+ xfs_iflush_abort(iq);
+ kmem_free(ilist, ilist_size);
+ return XFS_ERROR(EFSCORRUPTED);
+}
+
/*
* xfs_iflush() will write a modified inode's changes out to the
* inode's on disk home. The caller must have the inode lock held
@@ -3046,11 +3144,7 @@ xfs_iflush(
xfs_dinode_t *dip;
xfs_mount_t *mp;
int error;
- /* REFERENCED */
- xfs_inode_t *iq;
- int clcount; /* count of inodes clustered */
- int bufwasdelwri;
- struct hlist_node *entry;
+ int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };
XFS_STATS_INC(xs_iflush_count);
@@ -3067,8 +3161,7 @@ xfs_iflush(
* If the inode isn't dirty, then just release the inode
* flush lock and do nothing.
*/
- if ((ip->i_update_core == 0) &&
- ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+ if (xfs_inode_clean(ip)) {
ASSERT((iip != NULL) ?
!(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
xfs_ifunlock(ip);
@@ -3076,11 +3169,21 @@ xfs_iflush(
}
/*
- * We can't flush the inode until it is unpinned, so
- * wait for it. We know noone new can pin it, because
- * we are holding the inode lock shared and you need
- * to hold it exclusively to pin the inode.
+ * We can't flush the inode until it is unpinned, so wait for it if we
+ * are allowed to block. We know noone new can pin it, because we are
+ * holding the inode lock shared and you need to hold it exclusively to
+ * pin the inode.
+ *
+ * If we are not allowed to block, force the log out asynchronously so
+ * that when we come back the inode will be unpinned. If other inodes
+ * in the same cluster are dirty, they will probably write the inode
+ * out for us if they occur after the log force completes.
*/
+ if (noblock && xfs_ipincount(ip)) {
+ xfs_iunpin_nowait(ip);
+ xfs_ifunlock(ip);
+ return EAGAIN;
+ }
xfs_iunpin_wait(ip);
/*
@@ -3097,15 +3200,6 @@ xfs_iflush(
}
/*
- * Get the buffer containing the on-disk inode.
- */
- error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0);
- if (error) {
- xfs_ifunlock(ip);
- return error;
- }
-
- /*
* Decide how buffer will be flushed out. This is done before
* the call to xfs_iflush_int because this field is zeroed by it.
*/
@@ -3121,6 +3215,7 @@ xfs_iflush(
case XFS_IFLUSH_DELWRI_ELSE_SYNC:
flags = 0;
break;
+ case XFS_IFLUSH_ASYNC_NOBLOCK:
case XFS_IFLUSH_ASYNC:
case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
flags = INT_ASYNC;
@@ -3140,6 +3235,7 @@ xfs_iflush(
case XFS_IFLUSH_DELWRI:
flags = INT_DELWRI;
break;
+ case XFS_IFLUSH_ASYNC_NOBLOCK:
case XFS_IFLUSH_ASYNC:
flags = INT_ASYNC;
break;
@@ -3154,89 +3250,36 @@ xfs_iflush(
}
/*
- * First flush out the inode that xfs_iflush was called with.
+ * Get the buffer containing the on-disk inode.
*/
- error = xfs_iflush_int(ip, bp);
- if (error) {
- goto corrupt_out;
+ error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0,
+ noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK);
+ if (error || !bp) {
+ xfs_ifunlock(ip);
+ return error;
}
/*
- * inode clustering:
- * see if other inodes can be gathered into this write
+ * First flush out the inode that xfs_iflush was called with.
*/
- spin_lock(&ip->i_cluster->icl_lock);
- ip->i_cluster->icl_buf = bp;
-
- clcount = 0;
- hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) {
- if (iq == ip)
- continue;
-
- /*
- * Do an un-protected check to see if the inode is dirty and
- * is a candidate for flushing. These checks will be repeated
- * later after the appropriate locks are acquired.
- */
- iip = iq->i_itemp;
- if ((iq->i_update_core == 0) &&
- ((iip == NULL) ||
- !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
- xfs_ipincount(iq) == 0) {
- continue;
- }
-
- /*
- * Try to get locks. If any are unavailable,
- * then this inode cannot be flushed and is skipped.
- */
-
- /* get inode locks (just i_lock) */
- if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) {
- /* get inode flush lock */
- if (xfs_iflock_nowait(iq)) {
- /* check if pinned */
- if (xfs_ipincount(iq) == 0) {
- /* arriving here means that
- * this inode can be flushed.
- * first re-check that it's
- * dirty
- */
- iip = iq->i_itemp;
- if ((iq->i_update_core != 0)||
- ((iip != NULL) &&
- (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
- clcount++;
- error = xfs_iflush_int(iq, bp);
- if (error) {
- xfs_iunlock(iq,
- XFS_ILOCK_SHARED);
- goto cluster_corrupt_out;
- }
- } else {
- xfs_ifunlock(iq);
- }
- } else {
- xfs_ifunlock(iq);
- }
- }
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
- }
- }
- spin_unlock(&ip->i_cluster->icl_lock);
-
- if (clcount) {
- XFS_STATS_INC(xs_icluster_flushcnt);
- XFS_STATS_ADD(xs_icluster_flushinode, clcount);
- }
+ error = xfs_iflush_int(ip, bp);
+ if (error)
+ goto corrupt_out;
/*
- * If the buffer is pinned then push on the log so we won't
+ * If the buffer is pinned then push on the log now so we won't
* get stuck waiting in the write for too long.
*/
- if (XFS_BUF_ISPINNED(bp)){
+ if (XFS_BUF_ISPINNED(bp))
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
- }
+
+ /*
+ * inode clustering:
+ * see if other inodes can be gathered into this write
+ */
+ error = xfs_iflush_cluster(ip, bp);
+ if (error)
+ goto cluster_corrupt_out;
if (flags & INT_DELWRI) {
xfs_bdwrite(mp, bp);
@@ -3250,52 +3293,11 @@ xfs_iflush(
corrupt_out:
xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- xfs_iflush_abort(ip);
- /*
- * Unlocks the flush lock
- */
- return XFS_ERROR(EFSCORRUPTED);
-
cluster_corrupt_out:
- /* Corruption detected in the clustering loop. Invalidate the
- * inode buffer and shut down the filesystem.
- */
- spin_unlock(&ip->i_cluster->icl_lock);
-
- /*
- * Clean up the buffer. If it was B_DELWRI, just release it --
- * brelse can handle it with no problems. If not, shut down the
- * filesystem before releasing the buffer.
- */
- if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) {
- xfs_buf_relse(bp);
- }
-
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-
- if(!bufwasdelwri) {
- /*
- * Just like incore_relse: if we have b_iodone functions,
- * mark the buffer as an error and call them. Otherwise
- * mark it as stale and brelse.
- */
- if (XFS_BUF_IODONE_FUNC(bp)) {
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
- XFS_BUF_UNDONE(bp);
- XFS_BUF_STALE(bp);
- XFS_BUF_SHUT(bp);
- XFS_BUF_ERROR(bp,EIO);
- xfs_biodone(bp);
- } else {
- XFS_BUF_STALE(bp);
- xfs_buf_relse(bp);
- }
- }
-
- xfs_iflush_abort(iq);
/*
* Unlocks the flush lock
*/
+ xfs_iflush_abort(ip);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -3325,8 +3327,7 @@ xfs_iflush_int(
* If the inode isn't dirty, then just release the inode
* flush lock and do nothing.
*/
- if ((ip->i_update_core == 0) &&
- ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) {
+ if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
return 0;
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index bfcd72cbaeea..93c37697a72c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -133,19 +133,6 @@ typedef struct dm_attrs_s {
} dm_attrs_t;
/*
- * This is the xfs inode cluster structure. This structure is used by
- * xfs_iflush to find inodes that share a cluster and can be flushed to disk at
- * the same time.
- */
-typedef struct xfs_icluster {
- struct hlist_head icl_inodes; /* list of inodes on cluster */
- xfs_daddr_t icl_blkno; /* starting block number of
- * the cluster */
- struct xfs_buf *icl_buf; /* the inode buffer */
- spinlock_t icl_lock; /* inode list lock */
-} xfs_icluster_t;
-
-/*
* This is the xfs in-core inode structure.
* Most of the on-disk inode is embedded in the i_d field.
*
@@ -240,10 +227,6 @@ typedef struct xfs_inode {
atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */
-#ifdef HAVE_REFCACHE
- struct xfs_inode **i_refcache; /* ptr to entry in ref cache */
- struct xfs_inode *i_release; /* inode to unref */
-#endif
/* Miscellaneous state. */
unsigned short i_flags; /* see defined flags below */
unsigned char i_update_core; /* timestamps/size is dirty */
@@ -252,8 +235,6 @@ typedef struct xfs_inode {
unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */
- xfs_icluster_t *i_cluster; /* cluster list header */
- struct hlist_node i_cnode; /* cluster link node */
xfs_fsize_t i_size; /* in-memory size */
xfs_fsize_t i_new_size; /* size when write completes */
@@ -461,6 +442,7 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
#define XFS_IFLUSH_SYNC 3
#define XFS_IFLUSH_ASYNC 4
#define XFS_IFLUSH_DELWRI 5
+#define XFS_IFLUSH_ASYNC_NOBLOCK 6
/*
* Flags for xfs_itruncate_start().
@@ -515,7 +497,7 @@ int xfs_finish_reclaim_all(struct xfs_mount *, int);
*/
int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
xfs_inode_t *, struct xfs_dinode **, struct xfs_buf **,
- xfs_daddr_t, uint);
+ xfs_daddr_t, uint, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t,
xfs_inode_t **, xfs_daddr_t, uint);
int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int);
@@ -597,7 +579,6 @@ void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#define xfs_inobp_check(mp, bp)
#endif /* DEBUG */
-extern struct kmem_zone *xfs_icluster_zone;
extern struct kmem_zone *xfs_ifork_zone;
extern struct kmem_zone *xfs_inode_zone;
extern struct kmem_zone *xfs_ili_zone;
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index bfe92ea17952..40513077ab36 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -168,6 +168,14 @@ static inline int xfs_ilog_fext(int w)
return (w == XFS_DATA_FORK ? XFS_ILOG_DEXT : XFS_ILOG_AEXT);
}
+static inline int xfs_inode_clean(xfs_inode_t *ip)
+{
+ return (!ip->i_itemp ||
+ !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
+ !ip->i_update_core;
+}
+
+
#ifdef __KERNEL__
extern void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 658aab6b1bbf..38390e7381de 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -614,7 +614,8 @@ xfs_bulkstat(
xfs_buf_relse(bp);
error = xfs_itobp(mp, NULL, ip,
&dip, &bp, bno,
- XFS_IMAP_BULKSTAT);
+ XFS_IMAP_BULKSTAT,
+ XFS_BUF_LOCK);
if (!error)
clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a75edca1860f..6439c89826dc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- iclog->ic_refcnt++;
+ atomic_inc(&iclog->ic_refcnt);
spin_unlock(&log->l_icloglock);
xlog_state_want_sync(log, iclog);
(void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
*/
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- iclog->ic_refcnt++;
+ atomic_inc(&iclog->ic_refcnt);
spin_unlock(&log->l_icloglock);
xlog_state_want_sync(log, iclog);
@@ -1405,7 +1405,7 @@ xlog_sync(xlog_t *log,
int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb);
XFS_STATS_INC(xs_log_writes);
- ASSERT(iclog->ic_refcnt == 0);
+ ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
/* Add for LR header */
count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2309,7 +2309,7 @@ xlog_state_done_syncing(
ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_IOERROR);
- ASSERT(iclog->ic_refcnt == 0);
+ ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
@@ -2391,7 +2391,7 @@ restart:
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
head = &iclog->ic_header;
- iclog->ic_refcnt++; /* prevents sync */
+ atomic_inc(&iclog->ic_refcnt); /* prevents sync */
log_offset = iclog->ic_offset;
/* On the 1st write to an iclog, figure out lsn. This works
@@ -2423,12 +2423,12 @@ restart:
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
/* If I'm the only one writing to this iclog, sync it to disk */
- if (iclog->ic_refcnt == 1) {
+ if (atomic_read(&iclog->ic_refcnt) == 1) {
spin_unlock(&log->l_icloglock);
if ((error = xlog_state_release_iclog(log, iclog)))
return error;
} else {
- iclog->ic_refcnt--;
+ atomic_dec(&iclog->ic_refcnt);
spin_unlock(&log->l_icloglock);
}
goto restart;
@@ -2813,33 +2813,35 @@ xlog_state_put_ticket(xlog_t *log,
*
*/
STATIC int
-xlog_state_release_iclog(xlog_t *log,
- xlog_in_core_t *iclog)
+xlog_state_release_iclog(
+ xlog_t *log,
+ xlog_in_core_t *iclog)
{
int sync = 0; /* do we sync? */
- xlog_assign_tail_lsn(log->l_mp);
+ if (iclog->ic_state & XLOG_STATE_IOERROR)
+ return XFS_ERROR(EIO);
- spin_lock(&log->l_icloglock);
+ ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
+ if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
+ return 0;
if (iclog->ic_state & XLOG_STATE_IOERROR) {
spin_unlock(&log->l_icloglock);
return XFS_ERROR(EIO);
}
-
- ASSERT(iclog->ic_refcnt > 0);
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_WANT_SYNC);
- if (--iclog->ic_refcnt == 0 &&
- iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ /* update tail before writing to iclog */
+ xlog_assign_tail_lsn(log->l_mp);
sync++;
iclog->ic_state = XLOG_STATE_SYNCING;
iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
/* cycle incremented when incrementing curr_block */
}
-
spin_unlock(&log->l_icloglock);
/*
@@ -2849,11 +2851,9 @@ xlog_state_release_iclog(xlog_t *log,
* this iclog has consistent data, so we ignore IOERROR
* flags after this point.
*/
- if (sync) {
+ if (sync)
return xlog_sync(log, iclog);
- }
return 0;
-
} /* xlog_state_release_iclog */
@@ -2953,7 +2953,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
* previous iclog and go to sleep.
*/
if (iclog->ic_state == XLOG_STATE_DIRTY ||
- (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) {
+ (atomic_read(&iclog->ic_refcnt) == 0
+ && iclog->ic_offset == 0)) {
iclog = iclog->ic_prev;
if (iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2962,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
else
goto maybe_sleep;
} else {
- if (iclog->ic_refcnt == 0) {
+ if (atomic_read(&iclog->ic_refcnt) == 0) {
/* We are the only one with access to this
* iclog. Flush it out now. There should
* be a roundoff of zero to show that someone
* has already taken care of the roundoff from
* the previous sync.
*/
- iclog->ic_refcnt++;
+ atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
@@ -3100,7 +3101,7 @@ try_again:
already_slept = 1;
goto try_again;
} else {
- iclog->ic_refcnt++;
+ atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index e008233ee249..8662ce245c1f 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
#endif
int ic_size;
int ic_offset;
- int ic_refcnt;
+ atomic_t ic_refcnt;
int ic_bwritecnt;
ushort_t ic_state;
char *ic_datap; /* pointer to iclog data */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index b82d5d4d2462..d8a6d3089b16 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3214,7 +3214,8 @@ xlog_recover_process_iunlinks(
* next inode in the bucket.
*/
error = xfs_itobp(mp, NULL, ip, &dip,
- &ibp, 0, 0);
+ &ibp, 0, 0,
+ XFS_BUF_LOCK);
ASSERT(error || (dip != NULL));
}
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 7eb157a59f9e..1c6d40ed6816 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -36,7 +36,6 @@
#include "xfs_bmap.h"
#include "xfs_error.h"
#include "xfs_quota.h"
-#include "xfs_refcache.h"
#include "xfs_utils.h"
#include "xfs_trans_space.h"
#include "xfs_vnodeops.h"
@@ -580,10 +579,8 @@ xfs_rename(
* the vnode references.
*/
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (target_ip != NULL) {
- xfs_refcache_purge_ip(target_ip);
+ if (target_ip != NULL)
IRELE(target_ip);
- }
/*
* Let interposed file systems know about removed links.
*/
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b898022b..4e5c010f5040 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -304,7 +304,8 @@ xfs_trans_read_buf(
if (tp == NULL) {
bp = xfs_buf_read_flags(target, blkno, len, flags | BUF_BUSY);
if (!bp)
- return XFS_ERROR(ENOMEM);
+ return (flags & XFS_BUF_TRYLOCK) ?
+ EAGAIN : XFS_ERROR(ENOMEM);
if ((bp != NULL) && (XFS_BUF_GETERROR(bp) != 0)) {
xfs_ioerror_alert("xfs_trans_read_buf", mp,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 413587f02155..2cdf40f64ccc 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -43,7 +43,6 @@
#include "xfs_error.h"
#include "xfs_bmap.h"
#include "xfs_rw.h"
-#include "xfs_refcache.h"
#include "xfs_buf_item.h"
#include "xfs_log_priv.h"
#include "xfs_dir2_trace.h"
@@ -113,9 +112,6 @@ xfs_init(void)
xfs_ili_zone =
kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
KM_ZONE_SPREAD, NULL);
- xfs_icluster_zone =
- kmem_zone_init_flags(sizeof(xfs_icluster_t), "xfs_icluster",
- KM_ZONE_SPREAD, NULL);
/*
* Allocate global trace buffers.
@@ -153,11 +149,9 @@ xfs_cleanup(void)
extern kmem_zone_t *xfs_inode_zone;
extern kmem_zone_t *xfs_efd_zone;
extern kmem_zone_t *xfs_efi_zone;
- extern kmem_zone_t *xfs_icluster_zone;
xfs_cleanup_procfs();
xfs_sysctl_unregister();
- xfs_refcache_destroy();
xfs_filestream_uninit();
xfs_mru_cache_uninit();
xfs_acl_zone_destroy(xfs_acl_zone);
@@ -189,7 +183,6 @@ xfs_cleanup(void)
kmem_zone_destroy(xfs_efi_zone);
kmem_zone_destroy(xfs_ifork_zone);
kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_icluster_zone);
}
/*
@@ -585,11 +578,6 @@ xfs_unmount(
0 : DM_FLAGS_UNWANTED;
}
#endif
- /*
- * First blow any referenced inode from this file system
- * out of the reference cache, and delete the timer.
- */
- xfs_refcache_purge_mp(mp);
/*
* Blow away any referenced inode in the filestreams cache.
@@ -653,7 +641,6 @@ xfs_quiesce_fs(
{
int count = 0, pincount;
- xfs_refcache_purge_mp(mp);
xfs_flush_buftarg(mp->m_ddev_targp, 0);
xfs_finish_reclaim_all(mp, 0);
@@ -1324,18 +1311,6 @@ xfs_syncsub(
}
/*
- * If this is the periodic sync, then kick some entries out of
- * the reference cache. This ensures that idle entries are
- * eventually kicked out of the cache.
- */
- if (flags & SYNC_REFCACHE) {
- if (flags & SYNC_WAIT)
- xfs_refcache_purge_mp(mp);
- else
- xfs_refcache_purge_some(mp);
- }
-
- /*
* If asked, update the disk superblock with incore counter values if we
* are using non-persistent counters so that they don't get too far out
* of sync if we crash or get a forced shutdown. We don't want to force
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 51305242ff8c..e9d2feb842ed 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -48,7 +48,6 @@
#include "xfs_quota.h"
#include "xfs_utils.h"
#include "xfs_rtalloc.h"
-#include "xfs_refcache.h"
#include "xfs_trans_space.h"
#include "xfs_log_priv.h"
#include "xfs_filestream.h"
@@ -1520,12 +1519,6 @@ xfs_release(
xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE);
}
-#ifdef HAVE_REFCACHE
- /* If we are in the NFS reference cache then don't do this now */
- if (ip->i_refcache)
- return 0;
-#endif
-
if (ip->i_d.di_nlink != 0) {
if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
((ip->i_size > 0) || (VN_CACHED(vp) > 0 ||
@@ -2449,14 +2442,6 @@ xfs_remove(
}
/*
- * Before we drop our extra reference to the inode, purge it
- * from the refcache if it is there. By waiting until afterwards
- * to do the IRELE, we ensure that we won't go inactive in the
- * xfs_refcache_purge_ip routine (although that would be OK).
- */
- xfs_refcache_purge_ip(ip);
-
- /*
* If we are using filestreams, kill the stream association.
* If the file is still open it may get a new one but that
* will get killed on last close in xfs_close() so we don't
@@ -2495,14 +2480,6 @@ xfs_remove(
cancel_flags |= XFS_TRANS_ABORT;
xfs_trans_cancel(tp, cancel_flags);
- /*
- * Before we drop our extra reference to the inode, purge it
- * from the refcache if it is there. By waiting until afterwards
- * to do the IRELE, we ensure that we won't go inactive in the
- * xfs_refcache_purge_ip routine (although that would be OK).
- */
- xfs_refcache_purge_ip(ip);
-
IRELE(ip);
goto std_return;
@@ -3461,14 +3438,7 @@ xfs_rwunlock(
if (S_ISDIR(ip->i_d.di_mode))
return;
if (locktype == VRWLOCK_WRITE) {
- /*
- * In the write case, we may have added a new entry to
- * the reference cache. This might store a pointer to
- * an inode to be released in this inode. If it is there,
- * clear the pointer and release the inode after unlocking
- * this one.
- */
- xfs_refcache_iunlock(ip, XFS_IOLOCK_EXCL);
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
} else {
ASSERT((locktype == VRWLOCK_READ) ||
(locktype == VRWLOCK_WRITE_DIRECT));
@@ -3484,7 +3454,6 @@ xfs_inode_flush(
int flags)
{
xfs_mount_t *mp = ip->i_mount;
- xfs_inode_log_item_t *iip = ip->i_itemp;
int error = 0;
if (XFS_FORCED_SHUTDOWN(mp))
@@ -3494,33 +3463,9 @@ xfs_inode_flush(
* Bypass inodes which have already been cleaned by
* the inode flush clustering code inside xfs_iflush
*/
- if ((ip->i_update_core == 0) &&
- ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)))
+ if (xfs_inode_clean(ip))
return 0;
- if (flags & FLUSH_LOG) {
- if (iip && iip->ili_last_lsn) {
- xlog_t *log = mp->m_log;
- xfs_lsn_t sync_lsn;
- int log_flags = XFS_LOG_FORCE;
-
- spin_lock(&log->l_grant_lock);
- sync_lsn = log->l_last_sync_lsn;
- spin_unlock(&log->l_grant_lock);
-
- if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
- if (flags & FLUSH_SYNC)
- log_flags |= XFS_LOG_SYNC;
- error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
- if (error)
- return error;
- }
-
- if (ip->i_update_core == 0)
- return 0;
- }
- }
-
/*
* We make this non-blocking if the inode is contended,
* return EAGAIN to indicate to the caller that they
@@ -3528,30 +3473,22 @@ xfs_inode_flush(
* blocking on inodes inside another operation right
* now, they get caught later by xfs_sync.
*/
- if (flags & FLUSH_INODE) {
- int flush_flags;
-
- if (flags & FLUSH_SYNC) {
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- xfs_iflock(ip);
- } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return EAGAIN;
- }
- } else {
+ if (flags & FLUSH_SYNC) {
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ xfs_iflock(ip);
+ } else if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+ if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) {
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
return EAGAIN;
}
-
- if (flags & FLUSH_SYNC)
- flush_flags = XFS_IFLUSH_SYNC;
- else
- flush_flags = XFS_IFLUSH_ASYNC;
-
- error = xfs_iflush(ip, flush_flags);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ } else {
+ return EAGAIN;
}
+ error = xfs_iflush(ip, (flags & FLUSH_SYNC) ? XFS_IFLUSH_SYNC
+ : XFS_IFLUSH_ASYNC_NOBLOCK);
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+
return error;
}
@@ -3694,12 +3631,12 @@ xfs_finish_reclaim(
* We get the flush lock regardless, though, just to make sure
* we don't free it while it is being flushed.
*/
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- if (!locked) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_iflock(ip);
- }
+ if (!locked) {
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_iflock(ip);
+ }
+ if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
if (ip->i_update_core ||
((ip->i_itemp != NULL) &&
(ip->i_itemp->ili_format.ilf_fields != 0))) {
@@ -3719,17 +3656,11 @@ xfs_finish_reclaim(
ASSERT(ip->i_update_core == 0);
ASSERT(ip->i_itemp == NULL ||
ip->i_itemp->ili_format.ilf_fields == 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else if (locked) {
- /*
- * We are not interested in doing an iflush if we're
- * in the process of shutting down the filesystem forcibly.
- * So, just reclaim the inode.
- */
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
+ xfs_ifunlock(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
reclaim:
xfs_ireclaim(ip);
return 0;