summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r--fs/xfs/xfs_inode.c337
1 files changed, 200 insertions, 137 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 96f606deee31..4e560e6a12c1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -25,6 +25,7 @@
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
+#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
@@ -48,6 +49,7 @@
#include "xfs_trans_priv.h"
#include "xfs_log.h"
#include "xfs_bmap_btree.h"
+#include "xfs_reflink.h"
kmem_zone_t *xfs_inode_zone;
@@ -76,6 +78,29 @@ xfs_get_extsz_hint(
}
/*
+ * Helper function to extract CoW extent size hint from inode.
+ * Between the extent size hint and the CoW extent size hint, we
+ * return the greater of the two. If the value is zero (automatic),
+ * use the default size.
+ */
+xfs_extlen_t
+xfs_get_cowextsz_hint(
+ struct xfs_inode *ip)
+{
+ xfs_extlen_t a, b;
+
+ a = 0;
+ if (ip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
+ a = ip->i_d.di_cowextsize;
+ b = xfs_get_extsz_hint(ip);
+
+ a = max(a, b);
+ if (a == 0)
+ return XFS_DEFAULT_COWEXTSZ_HINT;
+ return a;
+}
+
+/*
* These two are wrapper routines around the xfs_ilock() routine used to
* centralize some grungy code. They are used in places that wish to lock the
* inode solely for reading the extents. The reason these places can't just
@@ -431,7 +456,7 @@ xfs_lock_inumorder(int lock_mode, int subclass)
* lock more than one at a time, lockdep will report false positives saying we
* have violated locking orders.
*/
-void
+static void
xfs_lock_inodes(
xfs_inode_t **ips,
int inodes,
@@ -650,6 +675,8 @@ _xfs_dic2xflags(
if (di_flags2 & XFS_DIFLAG2_ANY) {
if (di_flags2 & XFS_DIFLAG2_DAX)
flags |= FS_XFLAG_DAX;
+ if (di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
+ flags |= FS_XFLAG_COWEXTSIZE;
}
if (has_attr)
@@ -667,14 +694,6 @@ xfs_ip2xflags(
return _xfs_dic2xflags(dic->di_flags, dic->di_flags2, XFS_IFORK_Q(ip));
}
-uint
-xfs_dic2xflags(
- struct xfs_dinode *dip)
-{
- return _xfs_dic2xflags(be16_to_cpu(dip->di_flags),
- be64_to_cpu(dip->di_flags2), XFS_DFORK_Q(dip));
-}
-
/*
* Lookups up an inode from "name". If ci_name is not NULL, then a CI match
* is allowed, otherwise it has to be an exact match. If a CI match is found,
@@ -748,7 +767,7 @@ out_unlock:
* are not linked into the directory structure - they are attached
* directly to the superblock - and so have no parent.
*/
-int
+static int
xfs_ialloc(
xfs_trans_t *tp,
xfs_inode_t *pip,
@@ -828,7 +847,7 @@ xfs_ialloc(
ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0);
- tv = current_fs_time(mp->m_super);
+ tv = current_time(inode);
inode->i_mtime = tv;
inode->i_atime = tv;
inode->i_ctime = tv;
@@ -841,6 +860,7 @@ xfs_ialloc(
if (ip->i_d.di_version == 3) {
inode->i_version = 1;
ip->i_d.di_flags2 = 0;
+ ip->i_d.di_cowextsize = 0;
ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
}
@@ -903,6 +923,15 @@ xfs_ialloc(
ip->i_d.di_flags |= di_flags;
ip->i_d.di_flags2 |= di_flags2;
}
+ if (pip &&
+ (pip->i_d.di_flags2 & XFS_DIFLAG2_ANY) &&
+ pip->i_d.di_version == 3 &&
+ ip->i_d.di_version == 3) {
+ if (pip->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) {
+ ip->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
+ ip->i_d.di_cowextsize = pip->i_d.di_cowextsize;
+ }
+ }
/* FALLTHROUGH */
case S_IFLNK:
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
@@ -1030,7 +1059,7 @@ xfs_dir_ialloc(
tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
}
- code = xfs_trans_roll(&tp, 0);
+ code = xfs_trans_roll(&tp, NULL);
if (committed != NULL)
*committed = 1;
@@ -1085,7 +1114,7 @@ xfs_dir_ialloc(
* link count to go to zero, move the inode to AGI unlinked list so that it can
* be freed when the last active reference goes away via xfs_inactive().
*/
-int /* error */
+static int /* error */
xfs_droplink(
xfs_trans_t *tp,
xfs_inode_t *ip)
@@ -1104,7 +1133,7 @@ xfs_droplink(
/*
* Increment the link count on an inode & log the change.
*/
-int
+static int
xfs_bumplink(
xfs_trans_t *tp,
xfs_inode_t *ip)
@@ -1130,7 +1159,7 @@ xfs_create(
struct xfs_inode *ip = NULL;
struct xfs_trans *tp = NULL;
int error;
- xfs_bmap_free_t free_list;
+ struct xfs_defer_ops dfops;
xfs_fsblock_t first_block;
bool unlock_dp_on_error = false;
prid_t prid;
@@ -1161,11 +1190,9 @@ xfs_create(
rdev = 0;
resblks = XFS_MKDIR_SPACE_RES(mp, name->len);
tres = &M_RES(mp)->tr_mkdir;
- tp = xfs_trans_alloc(mp, XFS_TRANS_MKDIR);
} else {
resblks = XFS_CREATE_SPACE_RES(mp, name->len);
tres = &M_RES(mp)->tr_create;
- tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE);
}
/*
@@ -1174,26 +1201,25 @@ xfs_create(
* the case we'll drop the one we have and get a more
* appropriate transaction later.
*/
- error = xfs_trans_reserve(tp, tres, resblks, 0);
+ error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
/* flush outstanding delalloc blocks and retry */
xfs_flush_inodes(mp);
- error = xfs_trans_reserve(tp, tres, resblks, 0);
+ error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
}
if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */
resblks = 0;
- error = xfs_trans_reserve(tp, tres, 0, 0);
+ error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
}
if (error)
- goto out_trans_cancel;
-
+ goto out_release_inode;
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL |
XFS_IOLOCK_PARENT | XFS_ILOCK_PARENT);
unlock_dp_on_error = true;
- xfs_bmap_init(&free_list, &first_block);
+ xfs_defer_init(&dfops, &first_block);
/*
* Reserve disk quota and the inode.
@@ -1230,7 +1256,7 @@ xfs_create(
unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino,
- &first_block, &free_list, resblks ?
+ &first_block, &dfops, resblks ?
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) {
ASSERT(error != -ENOSPC);
@@ -1264,7 +1290,7 @@ xfs_create(
*/
xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
- error = xfs_bmap_finish(&tp, &free_list, NULL);
+ error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
goto out_bmap_cancel;
@@ -1280,7 +1306,7 @@ xfs_create(
return 0;
out_bmap_cancel:
- xfs_bmap_cancel(&free_list);
+ xfs_defer_cancel(&dfops);
out_trans_cancel:
xfs_trans_cancel(tp);
out_release_inode:
@@ -1337,17 +1363,16 @@ xfs_create_tmpfile(
return error;
resblks = XFS_IALLOC_SPACE_RES(mp);
- tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
-
tres = &M_RES(mp)->tr_create_tmpfile;
- error = xfs_trans_reserve(tp, tres, resblks, 0);
+
+ error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
/* No space at all so try a "no-allocation" reservation */
resblks = 0;
- error = xfs_trans_reserve(tp, tres, 0, 0);
+ error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
}
if (error)
- goto out_trans_cancel;
+ goto out_release_inode;
error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
pdqp, resblks, 1, 0);
@@ -1413,7 +1438,7 @@ xfs_link(
xfs_mount_t *mp = tdp->i_mount;
xfs_trans_t *tp;
int error;
- xfs_bmap_free_t free_list;
+ struct xfs_defer_ops dfops;
xfs_fsblock_t first_block;
int resblks;
@@ -1432,15 +1457,14 @@ xfs_link(
if (error)
goto std_return;
- tp = xfs_trans_alloc(mp, XFS_TRANS_LINK);
resblks = XFS_LINK_SPACE_RES(mp, target_name->len);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, resblks, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
resblks = 0;
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_link, 0, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp);
}
if (error)
- goto error_return;
+ goto std_return;
xfs_ilock(tdp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
@@ -1465,7 +1489,7 @@ xfs_link(
goto error_return;
}
- xfs_bmap_init(&free_list, &first_block);
+ xfs_defer_init(&dfops, &first_block);
/*
* Handle initial link state of O_TMPFILE inode
@@ -1477,7 +1501,7 @@ xfs_link(
}
error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
- &first_block, &free_list, resblks);
+ &first_block, &dfops, resblks);
if (error)
goto error_return;
xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -1495,9 +1519,9 @@ xfs_link(
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, &free_list, NULL);
+ error = xfs_defer_finish(&tp, &dfops, NULL);
if (error) {
- xfs_bmap_cancel(&free_list);
+ xfs_defer_cancel(&dfops);
goto error_return;
}
@@ -1539,7 +1563,7 @@ xfs_itruncate_extents(
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp;
- xfs_bmap_free_t free_list;
+ struct xfs_defer_ops dfops;
xfs_fsblock_t first_block;
xfs_fileoff_t first_unmap_block;
xfs_fileoff_t last_block;
@@ -1575,12 +1599,12 @@ xfs_itruncate_extents(
ASSERT(first_unmap_block < last_block);
unmap_len = last_block - first_unmap_block + 1;
while (!done) {
- xfs_bmap_init(&free_list, &first_block);
+ xfs_defer_init(&dfops, &first_block);
error = xfs_bunmapi(tp, ip,
first_unmap_block, unmap_len,
xfs_bmapi_aflag(whichfork),
XFS_ITRUNC_MAX_EXTENTS,
- &first_block, &free_list,
+ &first_block, &dfops,
&done);
if (error)
goto out_bmap_cancel;
@@ -1589,7 +1613,7 @@ xfs_itruncate_extents(
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(&tp, &free_list, ip);
+ error = xfs_defer_finish(&tp, &dfops, ip);
if (error)
goto out_bmap_cancel;
@@ -1598,6 +1622,20 @@ xfs_itruncate_extents(
goto out;
}
+ /* Remove all pending CoW reservations. */
+ error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block,
+ last_block);
+ if (error)
+ goto out;
+
+ /*
+ * Clear the reflink flag if we truncated everything.
+ */
+ if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) {
+ ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
+ xfs_inode_clear_cowblocks_tag(ip);
+ }
+
/*
* Always re-log the inode so that our permanent transaction can keep
* on rolling it forward in the log.
@@ -1615,7 +1653,7 @@ out_bmap_cancel:
* the transaction can be properly aborted. We just need to make sure
* we're not holding any resources that we were not when we came in.
*/
- xfs_bmap_cancel(&free_list);
+ xfs_defer_cancel(&dfops);
goto out;
}
@@ -1710,11 +1748,9 @@ xfs_inactive_truncate(
struct xfs_trans *tp;
int error;
- tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp);
return error;
}
@@ -1724,7 +1760,7 @@ xfs_inactive_truncate(
/*
* Log the inode size first to prevent stale data exposure in the event
* of a system crash before the truncate completes. See the related
- * comment in xfs_setattr_size() for details.
+ * comment in xfs_vn_setattr_size() for details.
*/
ip->i_d.di_size = 0;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -1758,14 +1794,12 @@ STATIC int
xfs_inactive_ifree(
struct xfs_inode *ip)
{
- xfs_bmap_free_t free_list;
+ struct xfs_defer_ops dfops;
xfs_fsblock_t first_block;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int error;
- tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
-
/*
* The ifree transaction might need to allocate blocks for record
* insertion to the finobt. We don't want to fail here at ENOSPC, so
@@ -1781,9 +1815,8 @@ xfs_inactive_ifree(
* now remains allocated and sits on the unlinked list until the fs is
* repaired.
*/
- tp->t_flags |= XFS_TRANS_RESERVE;
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree,
- XFS_IFREE_SPACE_RES(mp), 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ifree,
+ XFS_IFREE_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
if (error) {
if (error == -ENOSPC) {
xfs_warn_ratelimited(mp,
@@ -1792,15 +1825,14 @@ xfs_inactive_ifree(
} else {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
}
- xfs_trans_cancel(tp);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- xfs_bmap_init(&free_list, &first_block);
- error = xfs_ifree(tp, ip, &free_list);
+ xfs_defer_init(&dfops, &first_block);
+ error = xfs_ifree(tp, ip, &dfops);
if (error) {
/*
* If we fail to free the inode, shut down. The cancel
@@ -1826,11 +1858,11 @@ xfs_inactive_ifree(
* Just ignore errors at this point. There is nothing we can do except
* to try to keep going. Make sure it's not a silent error.
*/
- error = xfs_bmap_finish(&tp, &free_list, NULL);
+ error = xfs_defer_finish(&tp, &dfops, NULL);
if (error) {
- xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
+ xfs_notice(mp, "%s: xfs_defer_finish returned error %d",
__func__, error);
- xfs_bmap_cancel(&free_list);
+ xfs_defer_cancel(&dfops);
}
error = xfs_trans_commit(tp);
if (error)
@@ -1868,6 +1900,7 @@ xfs_inactive(
}
mp = ip->i_mount;
+ ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY));
/* If this is a read-only mount, don't do this (would generate I/O) */
if (mp->m_flags & XFS_MOUNT_RDONLY)
@@ -2386,7 +2419,7 @@ int
xfs_ifree(
xfs_trans_t *tp,
xfs_inode_t *ip,
- xfs_bmap_free_t *flist)
+ struct xfs_defer_ops *dfops)
{
int error;
struct xfs_icluster xic = { 0 };
@@ -2405,7 +2438,7 @@ xfs_ifree(
if (error)
return error;
- error = xfs_difree(tp, ip->i_ino, flist, &xic);
+ error = xfs_difree(tp, ip->i_ino, dfops, &xic);
if (error)
return error;
@@ -2493,7 +2526,7 @@ xfs_iunpin_wait(
* directory entry.
*
* This is still safe from a transactional point of view - it is not until we
- * get to xfs_bmap_finish() that we have the possibility of multiple
+ * get to xfs_defer_finish() that we have the possibility of multiple
* transactions in this operation. Hence as long as we remove the directory
* entry and drop the link count in the first transaction of the remove
* operation, there are no transactional constraints on the ordering here.
@@ -2508,7 +2541,7 @@ xfs_remove(
xfs_trans_t *tp = NULL;
int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
int error = 0;
- xfs_bmap_free_t free_list;
+ struct xfs_defer_ops dfops;
xfs_fsblock_t first_block;
uint resblks;
@@ -2525,11 +2558,6 @@ xfs_remove(
if (error)
goto std_return;
- if (is_dir)
- tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
- else
- tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
-
/*
* We try to get the real space reservation first,
* allowing for directory btree deletion(s) implying
@@ -2540,14 +2568,15 @@ xfs_remove(
* block from the directory.
*/
resblks = XFS_REMOVE_SPACE_RES(mp);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, resblks, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp);
if (error == -ENOSPC) {
resblks = 0;
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_remove, 0, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0,
+ &tp);
}
if (error) {
ASSERT(error != -ENOSPC);
- goto out_trans_cancel;
+ goto std_return;
}
xfs_ilock(dp, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
@@ -2594,9 +2623,9 @@ xfs_remove(
if (error)
goto out_trans_cancel;
- xfs_bmap_init(&free_list, &first_block);
+ xfs_defer_init(&dfops, &first_block);
error = xfs_dir_removename(tp, dp, name, ip->i_ino,
- &first_block, &free_list, resblks);
+ &first_block, &dfops, resblks);
if (error) {
ASSERT(error != -ENOENT);
goto out_bmap_cancel;
@@ -2610,7 +2639,7 @@ xfs_remove(
if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, &free_list, NULL);
+ error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
goto out_bmap_cancel;
@@ -2624,7 +2653,7 @@ xfs_remove(
return 0;
out_bmap_cancel:
- xfs_bmap_cancel(&free_list);
+ xfs_defer_cancel(&dfops);
out_trans_cancel:
xfs_trans_cancel(tp);
std_return:
@@ -2685,7 +2714,7 @@ xfs_sort_for_rename(
static int
xfs_finish_rename(
struct xfs_trans *tp,
- struct xfs_bmap_free *free_list)
+ struct xfs_defer_ops *dfops)
{
int error;
@@ -2696,9 +2725,9 @@ xfs_finish_rename(
if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
xfs_trans_set_sync(tp);
- error = xfs_bmap_finish(&tp, free_list, NULL);
+ error = xfs_defer_finish(&tp, dfops, NULL);
if (error) {
- xfs_bmap_cancel(free_list);
+ xfs_defer_cancel(dfops);
xfs_trans_cancel(tp);
return error;
}
@@ -2720,7 +2749,7 @@ xfs_cross_rename(
struct xfs_inode *dp2,
struct xfs_name *name2,
struct xfs_inode *ip2,
- struct xfs_bmap_free *free_list,
+ struct xfs_defer_ops *dfops,
xfs_fsblock_t *first_block,
int spaceres)
{
@@ -2732,14 +2761,14 @@ xfs_cross_rename(
/* Swap inode number for dirent in first parent */
error = xfs_dir_replace(tp, dp1, name1,
ip2->i_ino,
- first_block, free_list, spaceres);
+ first_block, dfops, spaceres);
if (error)
goto out_trans_abort;
/* Swap inode number for dirent in second parent */
error = xfs_dir_replace(tp, dp2, name2,
ip1->i_ino,
- first_block, free_list, spaceres);
+ first_block, dfops, spaceres);
if (error)
goto out_trans_abort;
@@ -2754,7 +2783,7 @@ xfs_cross_rename(
if (S_ISDIR(VFS_I(ip2)->i_mode)) {
error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
dp1->i_ino, first_block,
- free_list, spaceres);
+ dfops, spaceres);
if (error)
goto out_trans_abort;
@@ -2781,7 +2810,7 @@ xfs_cross_rename(
if (S_ISDIR(VFS_I(ip1)->i_mode)) {
error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
dp2->i_ino, first_block,
- free_list, spaceres);
+ dfops, spaceres);
if (error)
goto out_trans_abort;
@@ -2820,10 +2849,10 @@ xfs_cross_rename(
}
xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
- return xfs_finish_rename(tp, free_list);
+ return xfs_finish_rename(tp, dfops);
out_trans_abort:
- xfs_bmap_cancel(free_list);
+ xfs_defer_cancel(dfops);
xfs_trans_cancel(tp);
return error;
}
@@ -2855,6 +2884,7 @@ xfs_rename_alloc_whiteout(
* and flag it as linkable.
*/
drop_nlink(VFS_I(tmpfile));
+ xfs_setup_iops(tmpfile);
xfs_finish_inode_setup(tmpfile);
VFS_I(tmpfile)->i_state |= I_LINKABLE;
@@ -2877,7 +2907,7 @@ xfs_rename(
{
struct xfs_mount *mp = src_dp->i_mount;
struct xfs_trans *tp;
- struct xfs_bmap_free free_list;
+ struct xfs_defer_ops dfops;
xfs_fsblock_t first_block;
struct xfs_inode *wip = NULL; /* whiteout inode */
struct xfs_inode *inodes[__XFS_SORT_INODES];
@@ -2910,15 +2940,15 @@ xfs_rename(
xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
inodes, &num_inodes);
- tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp);
if (error == -ENOSPC) {
spaceres = 0;
- error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0,
+ &tp);
}
if (error)
- goto out_trans_cancel;
+ goto out_release_wip;
/*
* Attach the dquots to the inodes
@@ -2966,13 +2996,13 @@ xfs_rename(
goto out_trans_cancel;
}
- xfs_bmap_init(&free_list, &first_block);
+ xfs_defer_init(&dfops, &first_block);
/* RENAME_EXCHANGE is unique from here on. */
if (flags & RENAME_EXCHANGE)
return xfs_cross_rename(tp, src_dp, src_name, src_ip,
target_dp, target_name, target_ip,
- &free_list, &first_block, spaceres);
+ &dfops, &first_block, spaceres);
/*
* Set up the target.
@@ -2994,7 +3024,7 @@ xfs_rename(
*/
error = xfs_dir_createname(tp, target_dp, target_name,
src_ip->i_ino, &first_block,
- &free_list, spaceres);
+ &dfops, spaceres);
if (error)
goto out_bmap_cancel;
@@ -3034,7 +3064,7 @@ xfs_rename(
*/
error = xfs_dir_replace(tp, target_dp, target_name,
src_ip->i_ino,
- &first_block, &free_list, spaceres);
+ &first_block, &dfops, spaceres);
if (error)
goto out_bmap_cancel;
@@ -3069,7 +3099,7 @@ xfs_rename(
*/
error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot,
target_dp->i_ino,
- &first_block, &free_list, spaceres);
+ &first_block, &dfops, spaceres);
ASSERT(error != -EEXIST);
if (error)
goto out_bmap_cancel;
@@ -3108,10 +3138,10 @@ xfs_rename(
*/
if (wip) {
error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
- &first_block, &free_list, spaceres);
+ &first_block, &dfops, spaceres);
} else
error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
- &first_block, &free_list, spaceres);
+ &first_block, &dfops, spaceres);
if (error)
goto out_bmap_cancel;
@@ -3146,15 +3176,16 @@ xfs_rename(
if (new_parent)
xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
- error = xfs_finish_rename(tp, &free_list);
+ error = xfs_finish_rename(tp, &dfops);
if (wip)
IRELE(wip);
return error;
out_bmap_cancel:
- xfs_bmap_cancel(&free_list);
+ xfs_defer_cancel(&dfops);
out_trans_cancel:
xfs_trans_cancel(tp);
+out_release_wip:
if (wip)
IRELE(wip);
return error;
@@ -3162,16 +3193,16 @@ out_trans_cancel:
STATIC int
xfs_iflush_cluster(
- xfs_inode_t *ip,
- xfs_buf_t *bp)
+ struct xfs_inode *ip,
+ struct xfs_buf *bp)
{
- xfs_mount_t *mp = ip->i_mount;
+ struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
unsigned long first_index, mask;
unsigned long inodes_per_cluster;
- int ilist_size;
- xfs_inode_t **ilist;
- xfs_inode_t *iq;
+ int cilist_size;
+ struct xfs_inode **cilist;
+ struct xfs_inode *cip;
int nr_found;
int clcount = 0;
int bufwasdelwri;
@@ -3180,23 +3211,23 @@ xfs_iflush_cluster(
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
- ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
- ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);
- if (!ilist)
+ cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
+ cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
+ if (!cilist)
goto out_put;
mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
rcu_read_lock();
/* really need a gang lookup range call here */
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist,
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
first_index, inodes_per_cluster);
if (nr_found == 0)
goto out_free;
for (i = 0; i < nr_found; i++) {
- iq = ilist[i];
- if (iq == ip)
+ cip = cilist[i];
+ if (cip == ip)
continue;
/*
@@ -3205,20 +3236,30 @@ xfs_iflush_cluster(
* We need to check under the i_flags_lock for a valid inode
* here. Skip it if it is not valid or the wrong inode.
*/
- spin_lock(&ip->i_flags_lock);
- if (!ip->i_ino ||
- (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
- spin_unlock(&ip->i_flags_lock);
+ spin_lock(&cip->i_flags_lock);
+ if (!cip->i_ino ||
+ __xfs_iflags_test(cip, XFS_ISTALE)) {
+ spin_unlock(&cip->i_flags_lock);
continue;
}
- spin_unlock(&ip->i_flags_lock);
+
+ /*
+ * Once we fall off the end of the cluster, no point checking
+ * any more inodes in the list because they will also all be
+ * outside the cluster.
+ */
+ if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
+ spin_unlock(&cip->i_flags_lock);
+ break;
+ }
+ spin_unlock(&cip->i_flags_lock);
/*
* Do an un-protected check to see if the inode is dirty and
* is a candidate for flushing. These checks will be repeated
* later after the appropriate locks are acquired.
*/
- if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0)
+ if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
continue;
/*
@@ -3226,15 +3267,28 @@ xfs_iflush_cluster(
* then this inode cannot be flushed and is skipped.
*/
- if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED))
+ if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
continue;
- if (!xfs_iflock_nowait(iq)) {
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ if (!xfs_iflock_nowait(cip)) {
+ xfs_iunlock(cip, XFS_ILOCK_SHARED);
continue;
}
- if (xfs_ipincount(iq)) {
- xfs_ifunlock(iq);
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ if (xfs_ipincount(cip)) {
+ xfs_ifunlock(cip);
+ xfs_iunlock(cip, XFS_ILOCK_SHARED);
+ continue;
+ }
+
+
+ /*
+ * Check the inode number again, just to be certain we are not
+ * racing with freeing in xfs_reclaim_inode(). See the comments
+ * in that function for more information as to why the initial
+ * check is not sufficient.
+ */
+ if (!cip->i_ino) {
+ xfs_ifunlock(cip);
+ xfs_iunlock(cip, XFS_ILOCK_SHARED);
continue;
}
@@ -3242,18 +3296,18 @@ xfs_iflush_cluster(
* arriving here means that this inode can be flushed. First
* re-check that it's dirty before flushing.
*/
- if (!xfs_inode_clean(iq)) {
+ if (!xfs_inode_clean(cip)) {
int error;
- error = xfs_iflush_int(iq, bp);
+ error = xfs_iflush_int(cip, bp);
if (error) {
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ xfs_iunlock(cip, XFS_ILOCK_SHARED);
goto cluster_corrupt_out;
}
clcount++;
} else {
- xfs_ifunlock(iq);
+ xfs_ifunlock(cip);
}
- xfs_iunlock(iq, XFS_ILOCK_SHARED);
+ xfs_iunlock(cip, XFS_ILOCK_SHARED);
}
if (clcount) {
@@ -3263,7 +3317,7 @@ xfs_iflush_cluster(
out_free:
rcu_read_unlock();
- kmem_free(ilist);
+ kmem_free(cilist);
out_put:
xfs_perag_put(pag);
return 0;
@@ -3306,8 +3360,8 @@ cluster_corrupt_out:
/*
* Unlocks the flush lock
*/
- xfs_iflush_abort(iq, false);
- kmem_free(ilist);
+ xfs_iflush_abort(cip, false);
+ kmem_free(cilist);
xfs_perag_put(pag);
return -EFSCORRUPTED;
}
@@ -3327,7 +3381,7 @@ xfs_iflush(
struct xfs_buf **bpp)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_buf *bp;
+ struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
int error;
@@ -3369,14 +3423,22 @@ xfs_iflush(
}
/*
- * Get the buffer containing the on-disk inode.
+ * Get the buffer containing the on-disk inode. We are doing a try-lock
+ * operation here, so we may get an EAGAIN error. In that case, we
+ * simply want to return with the inode still dirty.
+ *
+ * If we get any other error, we effectively have a corruption situation
+ * and we cannot flush the inode, so we treat it the same as failing
+ * xfs_iflush_int().
*/
error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
0);
- if (error || !bp) {
+ if (error == -EAGAIN) {
xfs_ifunlock(ip);
return error;
}
+ if (error)
+ goto corrupt_out;
/*
* First flush out the inode that xfs_iflush was called with.
@@ -3404,7 +3466,8 @@ xfs_iflush(
return 0;
corrupt_out:
- xfs_buf_relse(bp);
+ if (bp)
+ xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
error = -EFSCORRUPTED;