summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_iget.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_iget.c')
-rw-r--r--fs/xfs/xfs_iget.c501
1 files changed, 199 insertions, 302 deletions
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e229e9e001c2..bf4dc5eb4cfc 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,164 +38,122 @@
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_utils.h"
+#include "xfs_trans_priv.h"
+#include "xfs_inode_item.h"
/*
- * Look up an inode by number in the given file system.
- * The inode is looked up in the cache held in each AG.
- * If the inode is found in the cache, attach it to the provided
- * vnode.
- *
- * If it is not in core, read it in from the file system's device,
- * add it to the cache and attach the provided vnode.
- *
- * The inode is locked according to the value of the lock_flags parameter.
- * This flag parameter indicates how and if the inode's IO lock and inode lock
- * should be taken.
- *
- * mp -- the mount point structure for the current file system. It points
- * to the inode hash table.
- * tp -- a pointer to the current transaction if there is one. This is
- * simply passed through to the xfs_iread() call.
- * ino -- the number of the inode desired. This is the unique identifier
- * within the file system for the inode being requested.
- * lock_flags -- flags indicating how to lock the inode. See the comment
- * for xfs_ilock() for a list of valid values.
- * bno -- the block number starting the buffer containing the inode,
- * if known (as by bulkstat), else 0.
+ * Check the validity of the inode we just found it the cache
*/
-STATIC int
-xfs_iget_core(
- struct inode *inode,
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_ino_t ino,
- uint flags,
- uint lock_flags,
- xfs_inode_t **ipp,
- xfs_daddr_t bno)
+static int
+xfs_iget_cache_hit(
+ struct xfs_perag *pag,
+ struct xfs_inode *ip,
+ int flags,
+ int lock_flags) __releases(pag->pag_ici_lock)
{
- struct inode *old_inode;
- xfs_inode_t *ip;
- xfs_inode_t *iq;
- int error;
- unsigned long first_index, mask;
- xfs_perag_t *pag;
- xfs_agino_t agino;
+ struct xfs_mount *mp = ip->i_mount;
+ int error = EAGAIN;
- /* the radix tree exists only in inode capable AGs */
- if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
- return EINVAL;
-
- /* get the perag structure and ensure that it's inode capable */
- pag = xfs_get_perag(mp, ino);
- if (!pag->pagi_inodeok)
- return EINVAL;
- ASSERT(pag->pag_ici_init);
- agino = XFS_INO_TO_AGINO(mp, ino);
+ /*
+ * If INEW is set this inode is being set up
+ * If IRECLAIM is set this inode is being torn down
+ * Pause and try again.
+ */
+ if (xfs_iflags_test(ip, (XFS_INEW|XFS_IRECLAIM))) {
+ XFS_STATS_INC(xs_ig_frecycle);
+ goto out_error;
+ }
-again:
- read_lock(&pag->pag_ici_lock);
- ip = radix_tree_lookup(&pag->pag_ici_root, agino);
+ /* If IRECLAIMABLE is set, we've torn down the vfs inode part */
+ if (xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
- if (ip != NULL) {
/*
- * If INEW is set this inode is being set up
- * we need to pause and try again.
+ * If lookup is racing with unlink, then we should return an
+ * error immediately so we don't remove it from the reclaim
+ * list and potentially leak the inode.
*/
- if (xfs_iflags_test(ip, XFS_INEW)) {
- read_unlock(&pag->pag_ici_lock);
- delay(1);
- XFS_STATS_INC(xs_ig_frecycle);
-
- goto again;
+ if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ goto out_error;
}
- old_inode = ip->i_vnode;
- if (old_inode == NULL) {
- /*
- * If IRECLAIM is set this inode is
- * on its way out of the system,
- * we need to pause and try again.
- */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
- delay(1);
- XFS_STATS_INC(xs_ig_frecycle);
-
- goto again;
- }
- ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-
- /*
- * If lookup is racing with unlink, then we
- * should return an error immediately so we
- * don't remove it from the reclaim list and
- * potentially leak the inode.
- */
- if ((ip->i_d.di_mode == 0) &&
- !(flags & XFS_IGET_CREATE)) {
- read_unlock(&pag->pag_ici_lock);
- xfs_put_perag(mp, pag);
- return ENOENT;
- }
-
- xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
-
- XFS_STATS_INC(xs_ig_found);
- xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
- read_unlock(&pag->pag_ici_lock);
-
- XFS_MOUNT_ILOCK(mp);
- list_del_init(&ip->i_reclaim);
- XFS_MOUNT_IUNLOCK(mp);
-
- goto finish_inode;
-
- } else if (inode != old_inode) {
- /* The inode is being torn down, pause and
- * try again.
- */
- if (old_inode->i_state & (I_FREEING | I_CLEAR)) {
- read_unlock(&pag->pag_ici_lock);
- delay(1);
- XFS_STATS_INC(xs_ig_frecycle);
-
- goto again;
- }
-/* Chances are the other vnode (the one in the inode) is being torn
-* down right now, and we landed on top of it. Question is, what do
-* we do? Unhook the old inode and hook up the new one?
-*/
- cmn_err(CE_PANIC,
- "xfs_iget_core: ambiguous vns: vp/0x%p, invp/0x%p",
- old_inode, inode);
- }
+ xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
/*
- * Inode cache hit
+ * We need to re-initialise the VFS inode as it has been
+ * 'freed' by the VFS. Do this here so we can deal with
+ * errors cleanly, then tag it so it can be set up correctly
+ * later.
*/
- read_unlock(&pag->pag_ici_lock);
- XFS_STATS_INC(xs_ig_found);
-
-finish_inode:
- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
- xfs_put_perag(mp, pag);
- return ENOENT;
+ if (!inode_init_always(mp->m_super, VFS_I(ip))) {
+ error = ENOMEM;
+ goto out_error;
}
- if (lock_flags != 0)
- xfs_ilock(ip, lock_flags);
+ /*
+ * We must set the XFS_INEW flag before clearing the
+ * XFS_IRECLAIMABLE flag so that if a racing lookup does
+ * not find the XFS_IRECLAIMABLE above but has the igrab()
+ * below succeed we can safely check XFS_INEW to detect
+ * that this inode is still being initialised.
+ */
+ xfs_iflags_set(ip, XFS_INEW);
+ xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
+
+ /* clear the radix tree reclaim flag as well. */
+ __xfs_inode_clear_reclaim_tag(mp, pag, ip);
+ } else if (!igrab(VFS_I(ip))) {
+ /* If the VFS inode is being torn down, pause and try again. */
+ XFS_STATS_INC(xs_ig_frecycle);
+ goto out_error;
+ } else if (xfs_iflags_test(ip, XFS_INEW)) {
+ /*
+ * We are racing with another cache hit that is
+ * currently recycling this inode out of the XFS_IRECLAIMABLE
+ * state. Wait for the initialisation to complete before
+ * continuing.
+ */
+ wait_on_inode(VFS_I(ip));
+ }
- xfs_iflags_clear(ip, XFS_ISTALE);
- xfs_itrace_exit_tag(ip, "xfs_iget.found");
- goto return_ip;
+ if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ iput(VFS_I(ip));
+ goto out_error;
}
- /*
- * Inode cache miss
- */
+ /* We've got a live one. */
+ read_unlock(&pag->pag_ici_lock);
+
+ if (lock_flags != 0)
+ xfs_ilock(ip, lock_flags);
+
+ xfs_iflags_clear(ip, XFS_ISTALE);
+ xfs_itrace_exit_tag(ip, "xfs_iget.found");
+ XFS_STATS_INC(xs_ig_found);
+ return 0;
+
+out_error:
read_unlock(&pag->pag_ici_lock);
- XFS_STATS_INC(xs_ig_missed);
+ return error;
+}
+
+
+static int
+xfs_iget_cache_miss(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ xfs_trans_t *tp,
+ xfs_ino_t ino,
+ struct xfs_inode **ipp,
+ xfs_daddr_t bno,
+ int flags,
+ int lock_flags) __releases(pag->pag_ici_lock)
+{
+ struct xfs_inode *ip;
+ int error;
+ unsigned long first_index, mask;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino);
/*
* Read the disk inode attributes into a new inode structure and get
@@ -203,116 +161,85 @@ finish_inode:
*/
error = xfs_iread(mp, tp, ino, &ip, bno,
(flags & XFS_IGET_BULKSTAT) ? XFS_IMAP_BULKSTAT : 0);
- if (error) {
- xfs_put_perag(mp, pag);
+ if (error)
return error;
- }
xfs_itrace_exit_tag(ip, "xfs_iget.alloc");
-
- mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- init_waitqueue_head(&ip->i_ipin_wait);
- atomic_set(&ip->i_pincount, 0);
-
- /*
- * Because we want to use a counting completion, complete
- * the flush completion once to allow a single access to
- * the flush completion without blocking.
- */
- init_completion(&ip->i_flush);
- complete(&ip->i_flush);
+ if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+ error = ENOENT;
+ goto out_destroy;
+ }
if (lock_flags)
xfs_ilock(ip, lock_flags);
- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
- xfs_idestroy(ip);
- xfs_put_perag(mp, pag);
- return ENOENT;
- }
-
/*
* Preload the radix tree so we can insert safely under the
- * write spinlock.
+ * write spinlock. Note that we cannot sleep inside the preload
+ * region.
*/
if (radix_tree_preload(GFP_KERNEL)) {
- xfs_idestroy(ip);
- delay(1);
- goto again;
+ error = EAGAIN;
+ goto out_unlock;
}
+
mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1);
first_index = agino & mask;
write_lock(&pag->pag_ici_lock);
- /*
- * insert the new inode
- */
+
+ /* insert the new inode */
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
if (unlikely(error)) {
- BUG_ON(error != -EEXIST);
- write_unlock(&pag->pag_ici_lock);
- radix_tree_preload_end();
- xfs_idestroy(ip);
+ WARN_ON(error != -EEXIST);
XFS_STATS_INC(xs_ig_dup);
- goto again;
+ error = EAGAIN;
+ goto out_preload_end;
}
- /*
- * These values _must_ be set before releasing the radix tree lock!
- */
+ /* These values _must_ be set before releasing the radix tree lock! */
ip->i_udquot = ip->i_gdquot = NULL;
xfs_iflags_set(ip, XFS_INEW);
write_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
-
- /*
- * Link ip to its mount and thread it on the mount's inode list.
- */
- XFS_MOUNT_ILOCK(mp);
- if ((iq = mp->m_inodes)) {
- ASSERT(iq->i_mprev->i_mnext == iq);
- ip->i_mprev = iq->i_mprev;
- iq->i_mprev->i_mnext = ip;
- iq->i_mprev = ip;
- ip->i_mnext = iq;
- } else {
- ip->i_mnext = ip;
- ip->i_mprev = ip;
- }
- mp->m_inodes = ip;
-
- XFS_MOUNT_IUNLOCK(mp);
- xfs_put_perag(mp, pag);
-
- return_ip:
- ASSERT(ip->i_df.if_ext_max ==
- XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
-
- xfs_iflags_set(ip, XFS_IMODIFIED);
*ipp = ip;
-
- /*
- * Set up the Linux with the Linux inode.
- */
- ip->i_vnode = inode;
- inode->i_private = ip;
-
- /*
- * If we have a real type for an on-disk inode, we can set ops(&unlock)
- * now. If it's a new inode being created, xfs_ialloc will handle it.
- */
- if (ip->i_d.di_mode != 0)
- xfs_setup_inode(ip);
return 0;
-}
+out_preload_end:
+ write_unlock(&pag->pag_ici_lock);
+ radix_tree_preload_end();
+out_unlock:
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
+out_destroy:
+ xfs_destroy_inode(ip);
+ return error;
+}
/*
- * The 'normal' internal xfs_iget, if needed it will
- * 'allocate', or 'get', the vnode.
+ * Look up an inode by number in the given file system.
+ * The inode is looked up in the cache held in each AG.
+ * If the inode is found in the cache, initialise the vfs inode
+ * if necessary.
+ *
+ * If it is not in core, read it in from the file system's device,
+ * add it to the cache and initialise the vfs inode.
+ *
+ * The inode is locked according to the value of the lock_flags parameter.
+ * This flag parameter indicates how and if the inode's IO lock and inode lock
+ * should be taken.
+ *
+ * mp -- the mount point structure for the current file system. It points
+ * to the inode hash table.
+ * tp -- a pointer to the current transaction if there is one. This is
+ * simply passed through to the xfs_iread() call.
+ * ino -- the number of the inode desired. This is the unique identifier
+ * within the file system for the inode being requested.
+ * lock_flags -- flags indicating how to lock the inode. See the comment
+ * for xfs_ilock() for a list of valid values.
+ * bno -- the block number starting the buffer containing the inode,
+ * if known (as by bulkstat), else 0.
*/
int
xfs_iget(
@@ -324,61 +251,65 @@ xfs_iget(
xfs_inode_t **ipp,
xfs_daddr_t bno)
{
- struct inode *inode;
xfs_inode_t *ip;
int error;
+ xfs_perag_t *pag;
+ xfs_agino_t agino;
- XFS_STATS_INC(xs_ig_attempts);
-
-retry:
- inode = iget_locked(mp->m_super, ino);
- if (!inode)
- /* If we got no inode we are out of memory */
- return ENOMEM;
-
- if (inode->i_state & I_NEW) {
- XFS_STATS_INC(vn_active);
- XFS_STATS_INC(vn_alloc);
-
- error = xfs_iget_core(inode, mp, tp, ino, flags,
- lock_flags, ipp, bno);
- if (error) {
- make_bad_inode(inode);
- if (inode->i_state & I_NEW)
- unlock_new_inode(inode);
- iput(inode);
- }
- return error;
+ /* the radix tree exists only in inode capable AGs */
+ if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
+ return EINVAL;
+
+ /* get the perag structure and ensure that it's inode capable */
+ pag = xfs_get_perag(mp, ino);
+ if (!pag->pagi_inodeok)
+ return EINVAL;
+ ASSERT(pag->pag_ici_init);
+ agino = XFS_INO_TO_AGINO(mp, ino);
+
+again:
+ error = 0;
+ read_lock(&pag->pag_ici_lock);
+ ip = radix_tree_lookup(&pag->pag_ici_root, agino);
+
+ if (ip) {
+ error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
+ if (error)
+ goto out_error_or_again;
+ } else {
+ read_unlock(&pag->pag_ici_lock);
+ XFS_STATS_INC(xs_ig_missed);
+
+ error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, bno,
+ flags, lock_flags);
+ if (error)
+ goto out_error_or_again;
}
+ xfs_put_perag(mp, pag);
+
+ xfs_iflags_set(ip, XFS_IMODIFIED);
+ *ipp = ip;
+ ASSERT(ip->i_df.if_ext_max ==
+ XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
/*
- * If the inode is not fully constructed due to
- * filehandle mismatches wait for the inode to go
- * away and try again.
- *
- * iget_locked will call __wait_on_freeing_inode
- * to wait for the inode to go away.
+ * If we have a real type for an on-disk inode, we can set ops(&unlock)
+ * now. If it's a new inode being created, xfs_ialloc will handle it.
*/
- if (is_bad_inode(inode)) {
- iput(inode);
- delay(1);
- goto retry;
- }
+ if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
+ xfs_setup_inode(ip);
+ return 0;
- ip = XFS_I(inode);
- if (!ip) {
- iput(inode);
+out_error_or_again:
+ if (error == EAGAIN) {
delay(1);
- goto retry;
+ goto again;
}
-
- if (lock_flags != 0)
- xfs_ilock(ip, lock_flags);
- XFS_STATS_INC(xs_ig_found);
- *ipp = ip;
- return 0;
+ xfs_put_perag(mp, pag);
+ return error;
}
+
/*
* Look for the inode corresponding to the given ino in the hash table.
* If it is there and its i_transp pointer matches tp, return it.
@@ -462,14 +393,13 @@ xfs_ireclaim(xfs_inode_t *ip)
xfs_iextract(ip);
/*
- * Here we do a spurious inode lock in order to coordinate with
- * xfs_sync(). This is because xfs_sync() references the inodes
- * in the mount list without taking references on the corresponding
- * vnodes. We make that OK here by ensuring that we wait until
- * the inode is unlocked in xfs_sync() before we go ahead and
- * free it. We get both the regular lock and the io lock because
- * the xfs_sync() code may need to drop the regular one but will
- * still hold the io lock.
+ * Here we do a spurious inode lock in order to coordinate with inode
+ * cache radix tree lookups. This is because the lookup can reference
+ * the inodes in the cache without taking references. We make that OK
+ * here by ensuring that we wait until the inode is unlocked after the
+ * lookup before we go ahead and free it. We get both the ilock and
+ * the iolock because the code may need to drop the ilock one but will
+ * still hold the iolock.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -480,14 +410,6 @@ xfs_ireclaim(xfs_inode_t *ip)
XFS_QM_DQDETACH(ip->i_mount, ip);
/*
- * Pull our behavior descriptor from the vnode chain.
- */
- if (ip->i_vnode) {
- ip->i_vnode->i_private = NULL;
- ip->i_vnode = NULL;
- }
-
- /*
* Free all memory associated with the inode.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
@@ -505,38 +427,13 @@ xfs_iextract(
{
xfs_mount_t *mp = ip->i_mount;
xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino);
- xfs_inode_t *iq;
write_lock(&pag->pag_ici_lock);
radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
write_unlock(&pag->pag_ici_lock);
xfs_put_perag(mp, pag);
- /*
- * Remove from mount's inode list.
- */
- XFS_MOUNT_ILOCK(mp);
- ASSERT((ip->i_mnext != NULL) && (ip->i_mprev != NULL));
- iq = ip->i_mnext;
- iq->i_mprev = ip->i_mprev;
- ip->i_mprev->i_mnext = iq;
-
- /*
- * Fix up the head pointer if it points to the inode being deleted.
- */
- if (mp->m_inodes == ip) {
- if (ip == iq) {
- mp->m_inodes = NULL;
- } else {
- mp->m_inodes = iq;
- }
- }
-
- /* Deal with the deleted inodes list */
- list_del_init(&ip->i_reclaim);
-
mp->m_ireclaims++;
- XFS_MOUNT_IUNLOCK(mp);
}
/*
@@ -737,7 +634,7 @@ xfs_iunlock(
* it is in the AIL and anyone is waiting on it. Don't do
* this if the caller has asked us not to.
*/
- xfs_trans_unlocked_item(ip->i_mount,
+ xfs_trans_unlocked_item(ip->i_itemp->ili_item.li_ailp,
(xfs_log_item_t*)(ip->i_itemp));
}
xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);