summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_attr.c1639
-rw-r--r--fs/xfs/libxfs/xfs_attr.h198
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c64
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c37
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h6
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h10
-rw-r--r--fs/xfs/libxfs/xfs_defer.c24
-rw-r--r--fs/xfs/libxfs/xfs_defer.h3
-rw-r--r--fs/xfs/libxfs/xfs_errortag.h8
-rw-r--r--fs/xfs/libxfs/xfs_format.h9
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h45
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/scrub/common.c2
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_attr_item.c824
-rw-r--r--fs/xfs/xfs_attr_item.h46
-rw-r--r--fs/xfs/xfs_attr_list.c1
-rw-r--r--fs/xfs/xfs_error.c9
-rw-r--r--fs/xfs/xfs_globals.c1
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_log.c41
-rw-r--r--fs/xfs/xfs_log.h1
-rw-r--r--fs/xfs/xfs_log_cil.c35
-rw-r--r--fs/xfs/xfs_log_priv.h34
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_ondisk.h2
-rw-r--r--fs/xfs/xfs_sysctl.h1
-rw-r--r--fs/xfs/xfs_sysfs.c24
-rw-r--r--fs/xfs/xfs_trace.h32
-rw-r--r--fs/xfs/xfs_xattr.c2
34 files changed, 2166 insertions, 953 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 04611a1068b4..b056cfc6398e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -102,6 +102,7 @@ xfs-y += xfs_log.o \
xfs_buf_item_recover.o \
xfs_dquot_item_recover.o \
xfs_extfree_item.o \
+ xfs_attr_item.o \
xfs_icreate_item.o \
xfs_inode_item.o \
xfs_inode_item_recover.o \
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 2815cfbbae70..14ae0826bc15 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -24,6 +24,11 @@
#include "xfs_quota.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
+#include "xfs_attr_item.h"
+#include "xfs_log.h"
+
+struct kmem_cache *xfs_attri_cache;
+struct kmem_cache *xfs_attrd_cache;
/*
* xfs_attr.c
@@ -53,26 +58,22 @@ STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
*/
STATIC int xfs_attr_node_get(xfs_da_args_t *args);
STATIC void xfs_attr_restore_rmt_blk(struct xfs_da_args *args);
-STATIC int xfs_attr_node_addname(struct xfs_delattr_context *dac);
-STATIC int xfs_attr_node_addname_find_attr(struct xfs_delattr_context *dac);
-STATIC int xfs_attr_node_addname_clear_incomplete(
- struct xfs_delattr_context *dac);
+static int xfs_attr_node_try_addname(struct xfs_attr_item *attr);
+STATIC int xfs_attr_node_addname_find_attr(struct xfs_attr_item *attr);
+STATIC int xfs_attr_node_remove_attr(struct xfs_attr_item *attr);
STATIC int xfs_attr_node_hasname(xfs_da_args_t *args,
struct xfs_da_state **state);
-STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
-STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
-STATIC int xfs_attr_set_iter(struct xfs_delattr_context *dac,
- struct xfs_buf **leaf_bp);
-STATIC int xfs_attr_node_removename(struct xfs_da_args *args,
- struct xfs_da_state *state);
int
xfs_inode_hasattr(
struct xfs_inode *ip)
{
- if (!XFS_IFORK_Q(ip) ||
- (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
- ip->i_afp->if_nextents == 0))
+ if (!XFS_IFORK_Q(ip))
+ return 0;
+ if (!ip->i_afp)
+ return 0;
+ if (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
+ ip->i_afp->if_nextents == 0)
return 0;
return 1;
}
@@ -97,6 +98,123 @@ xfs_attr_is_leaf(
return imap.br_startoff == 0 && imap.br_blockcount == 1;
}
+/*
+ * XXX (dchinner): name path state saving and refilling is an optimisation to
+ * avoid needing to look up name entries after rolling transactions removing
+ * remote xattr blocks between the name entry lookup and name entry removal.
+ * This optimisation got sidelined when combining the set and remove state
+ * machines, but the code has been left in place because it is worthwhile to
+ * restore the optimisation once the combined state machine paths have settled.
+ *
+ * This comment is a public service announcement to remind Future Dave that he
+ * still needs to restore this code to working order.
+ */
+#if 0
+/*
+ * Fill in the disk block numbers in the state structure for the buffers
+ * that are attached to the state structure.
+ * This is done so that we can quickly reattach ourselves to those buffers
+ * after some set of transaction commits have released these buffers.
+ */
+static int
+xfs_attr_fillstate(xfs_da_state_t *state)
+{
+ xfs_da_state_path_t *path;
+ xfs_da_state_blk_t *blk;
+ int level;
+
+ trace_xfs_attr_fillstate(state->args);
+
+ /*
+ * Roll down the "path" in the state structure, storing the on-disk
+ * block number for those buffers in the "path".
+ */
+ path = &state->path;
+ ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+ for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+ if (blk->bp) {
+ blk->disk_blkno = xfs_buf_daddr(blk->bp);
+ blk->bp = NULL;
+ } else {
+ blk->disk_blkno = 0;
+ }
+ }
+
+ /*
+ * Roll down the "altpath" in the state structure, storing the on-disk
+ * block number for those buffers in the "altpath".
+ */
+ path = &state->altpath;
+ ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+ for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+ if (blk->bp) {
+ blk->disk_blkno = xfs_buf_daddr(blk->bp);
+ blk->bp = NULL;
+ } else {
+ blk->disk_blkno = 0;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Reattach the buffers to the state structure based on the disk block
+ * numbers stored in the state structure.
+ * This is done after some set of transaction commits have released those
+ * buffers from our grip.
+ */
+static int
+xfs_attr_refillstate(xfs_da_state_t *state)
+{
+ xfs_da_state_path_t *path;
+ xfs_da_state_blk_t *blk;
+ int level, error;
+
+ trace_xfs_attr_refillstate(state->args);
+
+ /*
+ * Roll down the "path" in the state structure, storing the on-disk
+ * block number for those buffers in the "path".
+ */
+ path = &state->path;
+ ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+ for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+ if (blk->disk_blkno) {
+ error = xfs_da3_node_read_mapped(state->args->trans,
+ state->args->dp, blk->disk_blkno,
+ &blk->bp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ } else {
+ blk->bp = NULL;
+ }
+ }
+
+ /*
+ * Roll down the "altpath" in the state structure, storing the on-disk
+ * block number for those buffers in the "altpath".
+ */
+ path = &state->altpath;
+ ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
+ for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
+ if (blk->disk_blkno) {
+ error = xfs_da3_node_read_mapped(state->args->trans,
+ state->args->dp, blk->disk_blkno,
+ &blk->bp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ } else {
+ blk->bp = NULL;
+ }
+ }
+
+ return 0;
+}
+#else
+static int xfs_attr_fillstate(xfs_da_state_t *state) { return 0; }
+#endif
+
/*========================================================================
* Overall external interface routines.
*========================================================================*/
@@ -166,7 +284,7 @@ xfs_attr_get(
/*
* Calculate how many blocks we need for the new attribute,
*/
-STATIC int
+int
xfs_attr_calc_size(
struct xfs_da_args *args,
int *local)
@@ -199,6 +317,33 @@ xfs_attr_calc_size(
return nblks;
}
+/* Initialize transaction reservation for attr operations */
+void
+xfs_init_attr_trans(
+ struct xfs_da_args *args,
+ struct xfs_trans_res *tres,
+ unsigned int *total)
+{
+ struct xfs_mount *mp = args->dp->i_mount;
+
+ if (args->value) {
+ tres->tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
+ M_RES(mp)->tr_attrsetrt.tr_logres *
+ args->total;
+ tres->tr_logcount = XFS_ATTRSET_LOG_COUNT;
+ tres->tr_logflags = XFS_TRANS_PERM_LOG_RES;
+ *total = args->total;
+ } else {
+ *tres = M_RES(mp)->tr_attrrm;
+ *total = XFS_ATTRRM_SPACE_RES(mp);
+ }
+}
+
+/*
+ * Add an attr to a shortform fork. If there is no space,
+ * xfs_attr_shortform_addname() will convert to leaf format and return -ENOSPC.
+ * to use.
+ */
STATIC int
xfs_attr_try_sf_addname(
struct xfs_inode *dp,
@@ -230,411 +375,470 @@ xfs_attr_try_sf_addname(
return error;
}
-/*
- * Check to see if the attr should be upgraded from non-existent or shortform to
- * single-leaf-block attribute list.
- */
-static inline bool
-xfs_attr_is_shortform(
- struct xfs_inode *ip)
+static int
+xfs_attr_sf_addname(
+ struct xfs_attr_item *attr)
{
- return ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL ||
- (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
- ip->i_afp->if_nextents == 0);
+ struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_inode *dp = args->dp;
+ int error = 0;
+
+ error = xfs_attr_try_sf_addname(dp, args);
+ if (error != -ENOSPC) {
+ ASSERT(!error || error == -EEXIST);
+ attr->xattri_dela_state = XFS_DAS_DONE;
+ goto out;
+ }
+
+ /*
+ * It won't fit in the shortform, transform to a leaf block. GROT:
+ * another possible req'mt for a double-split btree op.
+ */
+ error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp);
+ if (error)
+ return error;
+
+ /*
+ * Prevent the leaf buffer from being unlocked so that a concurrent AIL
+ * push cannot grab the half-baked leaf buffer and run into problems
+ * with the write verifier.
+ */
+ xfs_trans_bhold(args->trans, attr->xattri_leaf_bp);
+ attr->xattri_dela_state = XFS_DAS_LEAF_ADD;
+out:
+ trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp);
+ return error;
}
/*
- * Checks to see if a delayed attribute transaction should be rolled. If so,
- * transaction is finished or rolled as needed.
+ * Handle the state change on completion of a multi-state attr operation.
+ *
+ * If the XFS_DA_OP_REPLACE flag is set, this means the operation was the first
+ * modification in a attr replace operation and we still have to do the second
+ * state, indicated by @replace_state.
+ *
+ * We consume the XFS_DA_OP_REPLACE flag so that when we are called again on
+ * completion of the second half of the attr replace operation we correctly
+ * signal that it is done.
*/
-STATIC int
-xfs_attr_trans_roll(
- struct xfs_delattr_context *dac)
+static enum xfs_delattr_state
+xfs_attr_complete_op(
+ struct xfs_attr_item *attr,
+ enum xfs_delattr_state replace_state)
{
- struct xfs_da_args *args = dac->da_args;
- int error;
+ struct xfs_da_args *args = attr->xattri_da_args;
+ bool do_replace = args->op_flags & XFS_DA_OP_REPLACE;
+
+ args->op_flags &= ~XFS_DA_OP_REPLACE;
+ if (do_replace) {
+ args->attr_filter &= ~XFS_ATTR_INCOMPLETE;
+ return replace_state;
+ }
+ return XFS_DAS_DONE;
+}
+
+static int
+xfs_attr_leaf_addname(
+ struct xfs_attr_item *attr)
+{
+ struct xfs_da_args *args = attr->xattri_da_args;
+ int error;
+
+ ASSERT(xfs_attr_is_leaf(args->dp));
+
+ /*
+ * Use the leaf buffer we may already hold locked as a result of
+ * a sf-to-leaf conversion. The held buffer is no longer valid
+ * after this call, regardless of the result.
+ */
+ error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp);
+ attr->xattri_leaf_bp = NULL;
+
+ if (error == -ENOSPC) {
+ error = xfs_attr3_leaf_to_node(args);
+ if (error)
+ return error;
- if (dac->flags & XFS_DAC_DEFER_FINISH) {
/*
- * The caller wants us to finish all the deferred ops so that we
- * avoid pinning the log tail with a large number of deferred
- * ops.
+ * We're not in leaf format anymore, so roll the transaction and
+ * retry the add to the newly allocated node block.
*/
- dac->flags &= ~XFS_DAC_DEFER_FINISH;
- error = xfs_defer_finish(&args->trans);
- } else
- error = xfs_trans_roll_inode(&args->trans, args->dp);
+ attr->xattri_dela_state = XFS_DAS_NODE_ADD;
+ goto out;
+ }
+ if (error)
+ return error;
+ /*
+ * We need to commit and roll if we need to allocate remote xattr blocks
+ * or perform more xattr manipulations. Otherwise there is nothing more
+ * to do and we can return success.
+ */
+ if (args->rmtblkno)
+ attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
+ else
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ XFS_DAS_LEAF_REPLACE);
+out:
+ trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
return error;
}
/*
- * Set the attribute specified in @args.
+ * Add an entry to a node format attr tree.
+ *
+ * Note that we might still have a leaf here - xfs_attr_is_leaf() cannot tell
+ * the difference between leaf + remote attr blocks and a node format tree,
+ * so we may still end up having to convert from leaf to node format here.
*/
-int
-xfs_attr_set_args(
- struct xfs_da_args *args)
+static int
+xfs_attr_node_addname(
+ struct xfs_attr_item *attr)
{
- struct xfs_buf *leaf_bp = NULL;
- int error = 0;
- struct xfs_delattr_context dac = {
- .da_args = args,
- };
+ struct xfs_da_args *args = attr->xattri_da_args;
+ int error;
- do {
- error = xfs_attr_set_iter(&dac, &leaf_bp);
- if (error != -EAGAIN)
- break;
+ ASSERT(!attr->xattri_leaf_bp);
+
+ error = xfs_attr_node_addname_find_attr(attr);
+ if (error)
+ return error;
- error = xfs_attr_trans_roll(&dac);
- if (error) {
- if (leaf_bp)
- xfs_trans_brelse(args->trans, leaf_bp);
+ error = xfs_attr_node_try_addname(attr);
+ if (error == -ENOSPC) {
+ error = xfs_attr3_leaf_to_node(args);
+ if (error)
return error;
- }
- } while (true);
+ /*
+ * No state change, we really are in node form now
+ * but we need the transaction rolled to continue.
+ */
+ goto out;
+ }
+ if (error)
+ return error;
+ if (args->rmtblkno)
+ attr->xattri_dela_state = XFS_DAS_NODE_SET_RMT;
+ else
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ XFS_DAS_NODE_REPLACE);
+out:
+ trace_xfs_attr_node_addname_return(attr->xattri_dela_state, args->dp);
return error;
}
-STATIC int
-xfs_attr_sf_addname(
- struct xfs_delattr_context *dac,
- struct xfs_buf **leaf_bp)
+static int
+xfs_attr_rmtval_alloc(
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
- struct xfs_inode *dp = args->dp;
+ struct xfs_da_args *args = attr->xattri_da_args;
int error = 0;
/*
- * Try to add the attr to the attribute list in the inode.
+ * If there was an out-of-line value, allocate the blocks we
+ * identified for its storage and copy the value. This is done
+ * after we create the attribute so that we don't overflow the
+ * maximum size of a transaction and/or hit a deadlock.
*/
- error = xfs_attr_try_sf_addname(dp, args);
+ if (attr->xattri_blkcnt > 0) {
+ error = xfs_attr_rmtval_set_blk(attr);
+ if (error)
+ return error;
+ /* Roll the transaction only if there is more to allocate. */
+ if (attr->xattri_blkcnt > 0)
+ goto out;
+ }
- /* Should only be 0, -EEXIST or -ENOSPC */
- if (error != -ENOSPC)
+ error = xfs_attr_rmtval_set_value(args);
+ if (error)
return error;
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ ++attr->xattri_dela_state);
/*
- * It won't fit in the shortform, transform to a leaf block. GROT:
- * another possible req'mt for a double-split btree op.
+ * If we are not doing a rename, we've finished the operation but still
+ * have to clear the incomplete flag protecting the new attr from
+ * exposing partially initialised state if we crash during creation.
*/
- error = xfs_attr_shortform_to_leaf(args, leaf_bp);
- if (error)
- return error;
+ if (attr->xattri_dela_state == XFS_DAS_DONE)
+ error = xfs_attr3_leaf_clearflag(args);
+out:
+ trace_xfs_attr_rmtval_alloc(attr->xattri_dela_state, args->dp);
+ return error;
+}
+
+/*
+ * Mark an attribute entry INCOMPLETE and save pointers to the relevant buffers
+ * for later deletion of the entry.
+ */
+static int
+xfs_attr_leaf_mark_incomplete(
+ struct xfs_da_args *args,
+ struct xfs_da_state *state)
+{
+ int error;
/*
- * Prevent the leaf buffer from being unlocked so that a concurrent AIL
- * push cannot grab the half-baked leaf buffer and run into problems
- * with the write verifier.
+ * Fill in disk block numbers in the state structure
+ * so that we can get the buffers back after we commit
+ * several transactions in the following calls.
*/
- xfs_trans_bhold(args->trans, *leaf_bp);
+ error = xfs_attr_fillstate(state);
+ if (error)
+ return error;
/*
- * We're still in XFS_DAS_UNINIT state here. We've converted
- * the attr fork to leaf format and will restart with the leaf
- * add.
+ * Mark the attribute as INCOMPLETE
*/
- trace_xfs_attr_sf_addname_return(XFS_DAS_UNINIT, args->dp);
- dac->flags |= XFS_DAC_DEFER_FINISH;
- return -EAGAIN;
+ return xfs_attr3_leaf_setflag(args);
}
/*
- * Set the attribute specified in @args.
- * This routine is meant to function as a delayed operation, and may return
- * -EAGAIN when the transaction needs to be rolled. Calling functions will need
- * to handle this, and recall the function until a successful error code is
- * returned.
+ * Initial setup for xfs_attr_node_removename. Make sure the attr is there and
+ * the blocks are valid. Attr keys with remote blocks will be marked
+ * incomplete.
*/
-int
-xfs_attr_set_iter(
- struct xfs_delattr_context *dac,
- struct xfs_buf **leaf_bp)
+static
+int xfs_attr_node_removename_setup(
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
- struct xfs_inode *dp = args->dp;
- struct xfs_buf *bp = NULL;
- int forkoff, error = 0;
-
- /* State machine switch */
- switch (dac->dela_state) {
- case XFS_DAS_UNINIT:
- /*
- * If the fork is shortform, attempt to add the attr. If there
- * is no space, this converts to leaf format and returns
- * -EAGAIN with the leaf buffer held across the roll. The caller
- * will deal with a transaction roll error, but otherwise
- * release the hold once we return with a clean transaction.
- */
- if (xfs_attr_is_shortform(dp))
- return xfs_attr_sf_addname(dac, leaf_bp);
- if (*leaf_bp != NULL) {
- xfs_trans_bhold_release(args->trans, *leaf_bp);
- *leaf_bp = NULL;
- }
+ struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_da_state **state = &attr->xattri_da_state;
+ int error;
- if (xfs_attr_is_leaf(dp)) {
- error = xfs_attr_leaf_try_add(args, *leaf_bp);
- if (error == -ENOSPC) {
- error = xfs_attr3_leaf_to_node(args);
- if (error)
- return error;
-
- /*
- * Finish any deferred work items and roll the
- * transaction once more. The goal here is to
- * call node_addname with the inode and
- * transaction in the same state (inode locked
- * and joined, transaction clean) no matter how
- * we got to this step.
- *
- * At this point, we are still in
- * XFS_DAS_UNINIT, but when we come back, we'll
- * be a node, so we'll fall down into the node
- * handling code below
- */
- dac->flags |= XFS_DAC_DEFER_FINISH;
- trace_xfs_attr_set_iter_return(
- dac->dela_state, args->dp);
- return -EAGAIN;
- } else if (error) {
- return error;
- }
+ error = xfs_attr_node_hasname(args, state);
+ if (error != -EEXIST)
+ goto out;
+ error = 0;
- dac->dela_state = XFS_DAS_FOUND_LBLK;
- } else {
- error = xfs_attr_node_addname_find_attr(dac);
- if (error)
- return error;
+ ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL);
+ ASSERT((*state)->path.blk[(*state)->path.active - 1].magic ==
+ XFS_ATTR_LEAF_MAGIC);
- error = xfs_attr_node_addname(dac);
- if (error)
- return error;
+ error = xfs_attr_leaf_mark_incomplete(args, *state);
+ if (error)
+ goto out;
+ if (args->rmtblkno > 0)
+ error = xfs_attr_rmtval_invalidate(args);
+out:
+ if (error)
+ xfs_da_state_free(*state);
- dac->dela_state = XFS_DAS_FOUND_NBLK;
- }
- trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
- return -EAGAIN;
- case XFS_DAS_FOUND_LBLK:
- /*
- * If there was an out-of-line value, allocate the blocks we
- * identified for its storage and copy the value. This is done
- * after we create the attribute so that we don't overflow the
- * maximum size of a transaction and/or hit a deadlock.
- */
+ return error;
+}
- /* Open coded xfs_attr_rmtval_set without trans handling */
- if ((dac->flags & XFS_DAC_LEAF_ADDNAME_INIT) == 0) {
- dac->flags |= XFS_DAC_LEAF_ADDNAME_INIT;
- if (args->rmtblkno > 0) {
- error = xfs_attr_rmtval_find_space(dac);
- if (error)
- return error;
- }
- }
+/*
+ * Remove the original attr we have just replaced. This is dependent on the
+ * original lookup and insert placing the old attr in args->blkno/args->index
+ * and the new attr in args->blkno2/args->index2.
+ */
+static int
+xfs_attr_leaf_remove_attr(
+ struct xfs_attr_item *attr)
+{
+ struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_buf *bp = NULL;
+ int forkoff;
+ int error;
- /*
- * Repeat allocating remote blocks for the attr value until
- * blkcnt drops to zero.
- */
- if (dac->blkcnt > 0) {
- error = xfs_attr_rmtval_set_blk(dac);
- if (error)
- return error;
- trace_xfs_attr_set_iter_return(dac->dela_state,
- args->dp);
- return -EAGAIN;
- }
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
+ &bp);
+ if (error)
+ return error;
- error = xfs_attr_rmtval_set_value(args);
- if (error)
- return error;
+ xfs_attr3_leaf_remove(bp, args);
- /*
- * If this is not a rename, clear the incomplete flag and we're
- * done.
- */
- if (!(args->op_flags & XFS_DA_OP_RENAME)) {
- if (args->rmtblkno > 0)
- error = xfs_attr3_leaf_clearflag(args);
- return error;
- }
+ forkoff = xfs_attr_shortform_allfit(bp, dp);
+ if (forkoff)
+ error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
+ /* bp is gone due to xfs_da_shrink_inode */
- /*
- * If this is an atomic rename operation, we must "flip" the
- * incomplete flags on the "new" and "old" attribute/value pairs
- * so that one disappears and one appears atomically. Then we
- * must remove the "old" attribute/value pair.
- *
- * In a separate transaction, set the incomplete flag on the
- * "old" attr and clear the incomplete flag on the "new" attr.
- */
- error = xfs_attr3_leaf_flipflags(args);
- if (error)
- return error;
- /*
- * Commit the flag value change and start the next trans in
- * series.
- */
- dac->dela_state = XFS_DAS_FLIP_LFLAG;
- trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
- return -EAGAIN;
- case XFS_DAS_FLIP_LFLAG:
- /*
- * Dismantle the "old" attribute/value pair by removing a
- * "remote" value (if it exists).
- */
- xfs_attr_restore_rmt_blk(args);
- error = xfs_attr_rmtval_invalidate(args);
- if (error)
- return error;
+ return error;
+}
- fallthrough;
- case XFS_DAS_RM_LBLK:
- /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */
- dac->dela_state = XFS_DAS_RM_LBLK;
- if (args->rmtblkno) {
- error = xfs_attr_rmtval_remove(dac);
- if (error == -EAGAIN)
- trace_xfs_attr_set_iter_return(
- dac->dela_state, args->dp);
- if (error)
- return error;
+/*
+ * Shrink an attribute from leaf to shortform. Used by the node format remove
+ * path when the node format collapses to a single block and so we have to check
+ * if it can be collapsed further.
+ */
+static int
+xfs_attr_leaf_shrink(
+ struct xfs_da_args *args)
+{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_buf *bp;
+ int forkoff;
+ int error;
- dac->dela_state = XFS_DAS_RD_LEAF;
- trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
- return -EAGAIN;
- }
+ if (!xfs_attr_is_leaf(dp))
+ return 0;
- fallthrough;
- case XFS_DAS_RD_LEAF:
- /*
- * This is the last step for leaf format. Read the block with
- * the old attr, remove the old attr, check for shortform
- * conversion and return.
- */
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
- &bp);
- if (error)
- return error;
+ error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ if (error)
+ return error;
- xfs_attr3_leaf_remove(bp, args);
+ forkoff = xfs_attr_shortform_allfit(bp, dp);
+ if (forkoff) {
+ error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
+ /* bp is gone due to xfs_da_shrink_inode */
+ } else {
+ xfs_trans_brelse(args->trans, bp);
+ }
- forkoff = xfs_attr_shortform_allfit(bp, dp);
- if (forkoff)
- error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
- /* bp is gone due to xfs_da_shrink_inode */
+ return error;
+}
- return error;
+/*
+ * Run the attribute operation specified in @attr.
+ *
+ * This routine is meant to function as a delayed operation and will set the
+ * state to XFS_DAS_DONE when the operation is complete. Calling functions will
+ * need to handle this, and recall the function until either an error or
+ * XFS_DAS_DONE is detected.
+ */
+int
+xfs_attr_set_iter(
+ struct xfs_attr_item *attr)
+{
+ struct xfs_da_args *args = attr->xattri_da_args;
+ int error = 0;
- case XFS_DAS_FOUND_NBLK:
- /*
- * Find space for remote blocks and fall into the allocation
- * state.
- */
- if (args->rmtblkno > 0) {
- error = xfs_attr_rmtval_find_space(dac);
- if (error)
- return error;
+ /* State machine switch */
+next_state:
+ switch (attr->xattri_dela_state) {
+ case XFS_DAS_UNINIT:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ case XFS_DAS_SF_ADD:
+ return xfs_attr_sf_addname(attr);
+ case XFS_DAS_LEAF_ADD:
+ return xfs_attr_leaf_addname(attr);
+ case XFS_DAS_NODE_ADD:
+ return xfs_attr_node_addname(attr);
+
+ case XFS_DAS_SF_REMOVE:
+ error = xfs_attr_sf_removename(args);
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ xfs_attr_init_add_state(args));
+ break;
+ case XFS_DAS_LEAF_REMOVE:
+ error = xfs_attr_leaf_removename(args);
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ xfs_attr_init_add_state(args));
+ break;
+ case XFS_DAS_NODE_REMOVE:
+ error = xfs_attr_node_removename_setup(attr);
+ if (error == -ENOATTR &&
+ (args->op_flags & XFS_DA_OP_RECOVERY)) {
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ xfs_attr_init_add_state(args));
+ error = 0;
+ break;
}
+ if (error)
+ return error;
+ attr->xattri_dela_state = XFS_DAS_NODE_REMOVE_RMT;
+ if (args->rmtblkno == 0)
+ attr->xattri_dela_state++;
+ break;
+ case XFS_DAS_LEAF_SET_RMT:
+ case XFS_DAS_NODE_SET_RMT:
+ error = xfs_attr_rmtval_find_space(attr);
+ if (error)
+ return error;
+ attr->xattri_dela_state++;
fallthrough;
- case XFS_DAS_ALLOC_NODE:
- /*
- * If there was an out-of-line value, allocate the blocks we
- * identified for its storage and copy the value. This is done
- * after we create the attribute so that we don't overflow the
- * maximum size of a transaction and/or hit a deadlock.
- */
- dac->dela_state = XFS_DAS_ALLOC_NODE;
- if (args->rmtblkno > 0) {
- if (dac->blkcnt > 0) {
- error = xfs_attr_rmtval_set_blk(dac);
- if (error)
- return error;
- trace_xfs_attr_set_iter_return(
- dac->dela_state, args->dp);
- return -EAGAIN;
- }
-
- error = xfs_attr_rmtval_set_value(args);
- if (error)
- return error;
- }
- /*
- * If this was not a rename, clear the incomplete flag and we're
- * done.
- */
- if (!(args->op_flags & XFS_DA_OP_RENAME)) {
- if (args->rmtblkno > 0)
- error = xfs_attr3_leaf_clearflag(args);
- goto out;
- }
+ case XFS_DAS_LEAF_ALLOC_RMT:
+ case XFS_DAS_NODE_ALLOC_RMT:
+ error = xfs_attr_rmtval_alloc(attr);
+ if (error)
+ return error;
+ if (attr->xattri_dela_state == XFS_DAS_DONE)
+ break;
+ goto next_state;
+ case XFS_DAS_LEAF_REPLACE:
+ case XFS_DAS_NODE_REPLACE:
/*
- * If this is an atomic rename operation, we must "flip" the
- * incomplete flags on the "new" and "old" attribute/value pairs
- * so that one disappears and one appears atomically. Then we
- * must remove the "old" attribute/value pair.
- *
- * In a separate transaction, set the incomplete flag on the
- * "old" attr and clear the incomplete flag on the "new" attr.
+ * We must "flip" the incomplete flags on the "new" and "old"
+ * attribute/value pairs so that one disappears and one appears
+ * atomically.
*/
error = xfs_attr3_leaf_flipflags(args);
if (error)
- goto out;
+ return error;
/*
- * Commit the flag value change and start the next trans in
- * series
+ * We must commit the flag value change now to make it atomic
+ * and then we can start the next trans in series at REMOVE_OLD.
*/
- dac->dela_state = XFS_DAS_FLIP_NFLAG;
- trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
- return -EAGAIN;
+ attr->xattri_dela_state++;
+ break;
- case XFS_DAS_FLIP_NFLAG:
+ case XFS_DAS_LEAF_REMOVE_OLD:
+ case XFS_DAS_NODE_REMOVE_OLD:
/*
- * Dismantle the "old" attribute/value pair by removing a
- * "remote" value (if it exists).
+ * If we have a remote attr, start the process of removing it
+ * by invalidating any cached buffers.
+ *
+ * If we don't have a remote attr, we skip the remote block
+ * removal state altogether with a second state increment.
*/
xfs_attr_restore_rmt_blk(args);
-
- error = xfs_attr_rmtval_invalidate(args);
- if (error)
- return error;
-
- fallthrough;
- case XFS_DAS_RM_NBLK:
- /* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */
- dac->dela_state = XFS_DAS_RM_NBLK;
if (args->rmtblkno) {
- error = xfs_attr_rmtval_remove(dac);
- if (error == -EAGAIN)
- trace_xfs_attr_set_iter_return(
- dac->dela_state, args->dp);
-
+ error = xfs_attr_rmtval_invalidate(args);
if (error)
return error;
+ } else {
+ attr->xattri_dela_state++;
+ }
+
+ attr->xattri_dela_state++;
+ goto next_state;
- dac->dela_state = XFS_DAS_CLR_FLAG;
- trace_xfs_attr_set_iter_return(dac->dela_state, args->dp);
- return -EAGAIN;
+ case XFS_DAS_LEAF_REMOVE_RMT:
+ case XFS_DAS_NODE_REMOVE_RMT:
+ error = xfs_attr_rmtval_remove(attr);
+ if (error == -EAGAIN) {
+ error = 0;
+ break;
}
+ if (error)
+ return error;
- fallthrough;
- case XFS_DAS_CLR_FLAG:
/*
- * The last state for node format. Look up the old attr and
- * remove it.
+ * We've finished removing the remote attr blocks, so commit the
+ * transaction and move on to removing the attr name from the
+ * leaf/node block. Removing the attr might require a full
+ * transaction reservation for btree block freeing, so we
+ * can't do that in the same transaction where we removed the
+ * remote attr blocks.
*/
- error = xfs_attr_node_addname_clear_incomplete(dac);
+ attr->xattri_dela_state++;
+ break;
+
+ case XFS_DAS_LEAF_REMOVE_ATTR:
+ error = xfs_attr_leaf_remove_attr(attr);
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ xfs_attr_init_add_state(args));
+ break;
+
+ case XFS_DAS_NODE_REMOVE_ATTR:
+ error = xfs_attr_node_remove_attr(attr);
+ if (!error)
+ error = xfs_attr_leaf_shrink(args);
+ attr->xattri_dela_state = xfs_attr_complete_op(attr,
+ xfs_attr_init_add_state(args));
break;
default:
ASSERT(0);
break;
}
-out:
+
+ trace_xfs_attr_set_iter_return(attr->xattri_dela_state, args->dp);
return error;
}
@@ -668,30 +872,79 @@ xfs_attr_lookup(
return xfs_attr_node_hasname(args, NULL);
}
-/*
- * Remove the attribute specified in @args.
- */
-int
-xfs_attr_remove_args(
+static int
+xfs_attr_item_init(
+ struct xfs_da_args *args,
+ unsigned int op_flags, /* op flag (set or remove) */
+ struct xfs_attr_item **attr) /* new xfs_attr_item */
+{
+
+ struct xfs_attr_item *new;
+
+ new = kmem_zalloc(sizeof(struct xfs_attr_item), KM_NOFS);
+ new->xattri_op_flags = op_flags;
+ new->xattri_da_args = args;
+
+ *attr = new;
+ return 0;
+}
+
+/* Sets an attribute for an inode as a deferred operation */
+static int
+xfs_attr_defer_add(
struct xfs_da_args *args)
{
- int error;
- struct xfs_delattr_context dac = {
- .da_args = args,
- };
+ struct xfs_attr_item *new;
+ int error = 0;
- do {
- error = xfs_attr_remove_iter(&dac);
- if (error != -EAGAIN)
- break;
+ error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_SET, &new);
+ if (error)
+ return error;
- error = xfs_attr_trans_roll(&dac);
- if (error)
- return error;
+ new->xattri_dela_state = xfs_attr_init_add_state(args);
+ xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
+ trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp);
- } while (true);
+ return 0;
+}
- return error;
+/* Sets an attribute for an inode as a deferred operation */
+static int
+xfs_attr_defer_replace(
+ struct xfs_da_args *args)
+{
+ struct xfs_attr_item *new;
+ int error = 0;
+
+ error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_REPLACE, &new);
+ if (error)
+ return error;
+
+ new->xattri_dela_state = xfs_attr_init_replace_state(args);
+ xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
+ trace_xfs_attr_defer_replace(new->xattri_dela_state, args->dp);
+
+ return 0;
+}
+
+/* Removes an attribute for an inode as a deferred operation */
+static int
+xfs_attr_defer_remove(
+ struct xfs_da_args *args)
+{
+
+ struct xfs_attr_item *new;
+ int error;
+
+ error = xfs_attr_item_init(args, XFS_ATTR_OP_FLAGS_REMOVE, &new);
+ if (error)
+ return error;
+
+ new->xattri_dela_state = xfs_attr_init_remove_state(args);
+ xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list);
+ trace_xfs_attr_defer_remove(new->xattri_dela_state, args->dp);
+
+ return 0;
}
/*
@@ -709,6 +962,7 @@ xfs_attr_set(
int error, local;
int rmt_blks = 0;
unsigned int total;
+ int delayed = xfs_has_larp(mp);
if (xfs_is_shutdown(dp->i_mount))
return -EIO;
@@ -730,8 +984,6 @@ xfs_attr_set(
if (args->value) {
XFS_STATS_INC(mp, xs_attr_set);
-
- args->op_flags |= XFS_DA_OP_ADDNAME;
args->total = xfs_attr_calc_size(args, &local);
/*
@@ -748,30 +1000,27 @@ xfs_attr_set(
return error;
}
- tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres +
- M_RES(mp)->tr_attrsetrt.tr_logres *
- args->total;
- tres.tr_logcount = XFS_ATTRSET_LOG_COUNT;
- tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
- total = args->total;
-
if (!local)
rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen);
} else {
XFS_STATS_INC(mp, xs_attr_remove);
-
- tres = M_RES(mp)->tr_attrrm;
- total = XFS_ATTRRM_SPACE_RES(mp);
rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
}
+ if (delayed) {
+ error = xfs_attr_use_log_assist(mp);
+ if (error)
+ return error;
+ }
+
/*
* Root fork attributes can use reserved data blocks for this
* operation if necessary
*/
+ xfs_init_attr_trans(args, &tres, &total);
error = xfs_trans_alloc_inode(dp, &tres, total, 0, rsvd, &args->trans);
if (error)
- return error;
+ goto drop_incompat;
if (args->value || xfs_inode_hasattr(dp)) {
error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK,
@@ -784,28 +1033,35 @@ xfs_attr_set(
}
error = xfs_attr_lookup(args);
- if (args->value) {
- if (error == -EEXIST && (args->attr_flags & XATTR_CREATE))
- goto out_trans_cancel;
- if (error == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
- goto out_trans_cancel;
- if (error != -ENOATTR && error != -EEXIST)
+ switch (error) {
+ case -EEXIST:
+ /* if no value, we are performing a remove operation */
+ if (!args->value) {
+ error = xfs_attr_defer_remove(args);
+ break;
+ }
+ /* Pure create fails if the attr already exists */
+ if (args->attr_flags & XATTR_CREATE)
goto out_trans_cancel;
- error = xfs_attr_set_args(args);
- if (error)
- goto out_trans_cancel;
- /* shortform attribute has already been committed */
- if (!args->trans)
- goto out_unlock;
- } else {
- if (error != -EEXIST)
+ error = xfs_attr_defer_replace(args);
+ break;
+ case -ENOATTR:
+ /* Can't remove what isn't there. */
+ if (!args->value)
goto out_trans_cancel;
- error = xfs_attr_remove_args(args);
- if (error)
+ /* Pure replace fails if no existing attr to replace. */
+ if (args->attr_flags & XATTR_REPLACE)
goto out_trans_cancel;
+
+ error = xfs_attr_defer_add(args);
+ break;
+ default:
+ goto out_trans_cancel;
}
+ if (error)
+ goto out_trans_cancel;
/*
* If this is a synchronous mount, make sure that the
@@ -824,6 +1080,9 @@ xfs_attr_set(
error = xfs_trans_commit(args->trans);
out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL);
+drop_incompat:
+ if (delayed)
+ xlog_drop_incompat_feat(mp->m_log);
return error;
out_trans_cancel:
@@ -832,6 +1091,40 @@ out_trans_cancel:
goto out_unlock;
}
+int __init
+xfs_attri_init_cache(void)
+{
+ xfs_attri_cache = kmem_cache_create("xfs_attri",
+ sizeof(struct xfs_attri_log_item),
+ 0, 0, NULL);
+
+ return xfs_attri_cache != NULL ? 0 : -ENOMEM;
+}
+
+void
+xfs_attri_destroy_cache(void)
+{
+ kmem_cache_destroy(xfs_attri_cache);
+ xfs_attri_cache = NULL;
+}
+
+int __init
+xfs_attrd_init_cache(void)
+{
+ xfs_attrd_cache = kmem_cache_create("xfs_attrd",
+ sizeof(struct xfs_attrd_log_item),
+ 0, 0, NULL);
+
+ return xfs_attrd_cache != NULL ? 0 : -ENOMEM;
+}
+
+void
+xfs_attrd_destroy_cache(void)
+{
+ kmem_cache_destroy(xfs_attrd_cache);
+ xfs_attrd_cache = NULL;
+}
+
/*========================================================================
* External routines when attribute list is inside the inode
*========================================================================*/
@@ -848,28 +1141,41 @@ static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
* Add a name to the shortform attribute list structure
* This is the external routine.
*/
-STATIC int
-xfs_attr_shortform_addname(xfs_da_args_t *args)
+static int
+xfs_attr_shortform_addname(
+ struct xfs_da_args *args)
{
- int newsize, forkoff, retval;
+ int newsize, forkoff;
+ int error;
trace_xfs_attr_sf_addname(args);
- retval = xfs_attr_shortform_lookup(args);
- if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
- return retval;
- if (retval == -EEXIST) {
- if (args->attr_flags & XATTR_CREATE)
- return retval;
- retval = xfs_attr_sf_removename(args);
- if (retval)
- return retval;
+ error = xfs_attr_shortform_lookup(args);
+ switch (error) {
+ case -ENOATTR:
+ if (args->op_flags & XFS_DA_OP_REPLACE)
+ return error;
+ break;
+ case -EEXIST:
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
+ return error;
+
+ error = xfs_attr_sf_removename(args);
+ if (error)
+ return error;
+
/*
- * Since we have removed the old attr, clear ATTR_REPLACE so
- * that the leaf format add routine won't trip over the attr
- * not being around.
+ * Since we have removed the old attr, clear XFS_DA_OP_REPLACE
+ * so that the new attr doesn't fit in shortform format, the
+ * leaf format add routine won't trip over the attr not being
+ * around.
*/
- args->attr_flags &= ~XATTR_REPLACE;
+ args->op_flags &= ~XFS_DA_OP_REPLACE;
+ break;
+ case 0:
+ break;
+ default:
+ return error;
}
if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
@@ -892,8 +1198,8 @@ xfs_attr_shortform_addname(xfs_da_args_t *args)
* External routines when attribute list is one block
*========================================================================*/
-/* Store info about a remote block */
-STATIC void
+/* Save the current remote block info and clear the current pointers. */
+static void
xfs_attr_save_rmt_blk(
struct xfs_da_args *args)
{
@@ -902,10 +1208,13 @@ xfs_attr_save_rmt_blk(
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
args->rmtvaluelen2 = args->rmtvaluelen;
+ args->rmtblkno = 0;
+ args->rmtblkcnt = 0;
+ args->rmtvaluelen = 0;
}
/* Set stored info about a remote block */
-STATIC void
+static void
xfs_attr_restore_rmt_blk(
struct xfs_da_args *args)
{
@@ -931,45 +1240,54 @@ xfs_attr_leaf_try_add(
struct xfs_da_args *args,
struct xfs_buf *bp)
{
- int retval;
+ int error;
/*
- * Look up the given attribute in the leaf block. Figure out if
- * the given flags produce an error or call for an atomic rename.
+ * If the caller provided a buffer to us, it is locked and held in
+ * the transaction because it just did a shortform to leaf conversion.
+ * Hence we don't need to read it again. Otherwise read in the leaf
+ * buffer.
*/
- retval = xfs_attr_leaf_hasname(args, &bp);
- if (retval != -ENOATTR && retval != -EEXIST)
- return retval;
- if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
- goto out_brelse;
- if (retval == -EEXIST) {
- if (args->attr_flags & XATTR_CREATE)
+ if (bp) {
+ xfs_trans_bhold_release(args->trans, bp);
+ } else {
+ error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Look up the xattr name to set the insertion point for the new xattr.
+ */
+ error = xfs_attr3_leaf_lookup_int(bp, args);
+ switch (error) {
+ case -ENOATTR:
+ if (args->op_flags & XFS_DA_OP_REPLACE)
+ goto out_brelse;
+ break;
+ case -EEXIST:
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
goto out_brelse;
trace_xfs_attr_leaf_replace(args);
-
- /* save the attribute state for later removal*/
- args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
- xfs_attr_save_rmt_blk(args);
-
/*
- * clear the remote attr state now that it is saved so that the
- * values reflect the state of the attribute we are about to
+ * Save the existing remote attr state so that the current
+ * values reflect the state of the new attribute we are about to
* add, not the attribute we just found and will remove later.
*/
- args->rmtblkno = 0;
- args->rmtblkcnt = 0;
- args->rmtvaluelen = 0;
+ xfs_attr_save_rmt_blk(args);
+ break;
+ case 0:
+ break;
+ default:
+ goto out_brelse;
}
- /*
- * Add the attribute to the leaf block
- */
return xfs_attr3_leaf_add(bp, args);
out_brelse:
xfs_trans_brelse(args->trans, bp);
- return retval;
+ return error;
}
/*
@@ -1015,9 +1333,10 @@ xfs_attr_leaf_removename(
dp = args->dp;
error = xfs_attr_leaf_hasname(args, &bp);
-
if (error == -ENOATTR) {
xfs_trans_brelse(args->trans, bp);
+ if (args->op_flags & XFS_DA_OP_RECOVERY)
+ return 0;
return error;
} else if (error != -EEXIST)
return error;
@@ -1101,46 +1420,45 @@ xfs_attr_node_hasname(
STATIC int
xfs_attr_node_addname_find_attr(
- struct xfs_delattr_context *dac)
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
- int retval;
+ struct xfs_da_args *args = attr->xattri_da_args;
+ int error;
/*
* Search to see if name already exists, and get back a pointer
* to where it should go.
*/
- retval = xfs_attr_node_hasname(args, &dac->da_state);
- if (retval != -ENOATTR && retval != -EEXIST)
- goto error;
-
- if (retval == -ENOATTR && (args->attr_flags & XATTR_REPLACE))
- goto error;
- if (retval == -EEXIST) {
- if (args->attr_flags & XATTR_CREATE)
+ error = xfs_attr_node_hasname(args, &attr->xattri_da_state);
+ switch (error) {
+ case -ENOATTR:
+ if (args->op_flags & XFS_DA_OP_REPLACE)
+ goto error;
+ break;
+ case -EEXIST:
+ if (!(args->op_flags & XFS_DA_OP_REPLACE))
goto error;
- trace_xfs_attr_node_replace(args);
-
- /* save the attribute state for later removal*/
- args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
- xfs_attr_save_rmt_blk(args);
+ trace_xfs_attr_node_replace(args);
/*
- * clear the remote attr state now that it is saved so that the
- * values reflect the state of the attribute we are about to
+ * Save the existing remote attr state so that the current
+ * values reflect the state of the new attribute we are about to
* add, not the attribute we just found and will remove later.
*/
- args->rmtblkno = 0;
- args->rmtblkcnt = 0;
- args->rmtvaluelen = 0;
+ xfs_attr_save_rmt_blk(args);
+ break;
+ case 0:
+ break;
+ default:
+ goto error;
}
return 0;
error:
- if (dac->da_state)
- xfs_da_state_free(dac->da_state);
- return retval;
+ if (attr->xattri_da_state)
+ xfs_da_state_free(attr->xattri_da_state);
+ return error;
}
/*
@@ -1149,21 +1467,13 @@ error:
* This will involve walking down the Btree, and may involve splitting
* leaf nodes and even splitting intermediate nodes up to and including
* the root node (a special case of an intermediate node).
- *
- * "Remote" attribute values confuse the issue and atomic rename operations
- * add a whole extra layer of confusion on top of that.
- *
- * This routine is meant to function as a delayed operation, and may return
- * -EAGAIN when the transaction needs to be rolled. Calling functions will need
- * to handle this, and recall the function until a successful error code is
- *returned.
*/
-STATIC int
-xfs_attr_node_addname(
- struct xfs_delattr_context *dac)
+static int
+xfs_attr_node_try_addname(
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
- struct xfs_da_state *state = dac->da_state;
+ struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_da_state *state = attr->xattri_da_state;
struct xfs_da_state_blk *blk;
int error;
@@ -1178,25 +1488,9 @@ xfs_attr_node_addname(
/*
* Its really a single leaf node, but it had
* out-of-line values so it looked like it *might*
- * have been a b-tree.
+ * have been a b-tree. Let the caller deal with this.
*/
- xfs_da_state_free(state);
- state = NULL;
- error = xfs_attr3_leaf_to_node(args);
- if (error)
- goto out;
-
- /*
- * Now that we have converted the leaf to a node, we can
- * roll the transaction, and try xfs_attr3_leaf_add
- * again on re-entry. No need to set dela_state to do
- * this. dela_state is still unset by this function at
- * this point.
- */
- dac->flags |= XFS_DAC_DEFER_FINISH;
- trace_xfs_attr_node_addname_return(
- dac->dela_state, args->dp);
- return -EAGAIN;
+ goto out;
}
/*
@@ -1208,7 +1502,6 @@ xfs_attr_node_addname(
error = xfs_da3_split(state);
if (error)
goto out;
- dac->flags |= XFS_DAC_DEFER_FINISH;
} else {
/*
* Addition succeeded, update Btree hashvals.
@@ -1217,24 +1510,42 @@ xfs_attr_node_addname(
}
out:
- if (state)
- xfs_da_state_free(state);
+ xfs_da_state_free(state);
return error;
}
+static int
+xfs_attr_node_removename(
+ struct xfs_da_args *args,
+ struct xfs_da_state *state)
+{
+ struct xfs_da_state_blk *blk;
+ int retval;
-STATIC int
-xfs_attr_node_addname_clear_incomplete(
- struct xfs_delattr_context *dac)
+ /*
+ * Remove the name and update the hashvals in the tree.
+ */
+ blk = &state->path.blk[state->path.active-1];
+ ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+ retval = xfs_attr3_leaf_remove(blk->bp, args);
+ xfs_da3_fixhashpath(state, &state->path);
+
+ return retval;
+}
+
+static int
+xfs_attr_node_remove_attr(
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
+ struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_da_state *state = NULL;
int retval = 0;
int error = 0;
/*
- * Re-find the "old" attribute entry after any split ops. The INCOMPLETE
- * flag means that we will find the "old" attr, not the "new" one.
+ * The attr we are removing has already been marked incomplete, so
+ * we need to set the filter appropriately to re-find the "old"
+ * attribute entry after any split ops.
*/
args->attr_filter |= XFS_ATTR_INCOMPLETE;
state = xfs_da_state_alloc(args);
@@ -1264,362 +1575,6 @@ out:
}
/*
- * Shrink an attribute from leaf to shortform
- */
-STATIC int
-xfs_attr_node_shrink(
- struct xfs_da_args *args,
- struct xfs_da_state *state)
-{
- struct xfs_inode *dp = args->dp;
- int error, forkoff;
- struct xfs_buf *bp;
-
- /*
- * Have to get rid of the copy of this dabuf in the state.
- */
- ASSERT(state->path.active == 1);
- ASSERT(state->path.blk[0].bp);
- state->path.blk[0].bp = NULL;
-
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
- if (error)
- return error;
-
- forkoff = xfs_attr_shortform_allfit(bp, dp);
- if (forkoff) {
- error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
- /* bp is gone due to xfs_da_shrink_inode */
- } else
- xfs_trans_brelse(args->trans, bp);
-
- return error;
-}
-
-/*
- * Mark an attribute entry INCOMPLETE and save pointers to the relevant buffers
- * for later deletion of the entry.
- */
-STATIC int
-xfs_attr_leaf_mark_incomplete(
- struct xfs_da_args *args,
- struct xfs_da_state *state)
-{
- int error;
-
- /*
- * Fill in disk block numbers in the state structure
- * so that we can get the buffers back after we commit
- * several transactions in the following calls.
- */
- error = xfs_attr_fillstate(state);
- if (error)
- return error;
-
- /*
- * Mark the attribute as INCOMPLETE
- */
- return xfs_attr3_leaf_setflag(args);
-}
-
-/*
- * Initial setup for xfs_attr_node_removename. Make sure the attr is there and
- * the blocks are valid. Attr keys with remote blocks will be marked
- * incomplete.
- */
-STATIC
-int xfs_attr_node_removename_setup(
- struct xfs_delattr_context *dac)
-{
- struct xfs_da_args *args = dac->da_args;
- struct xfs_da_state **state = &dac->da_state;
- int error;
-
- error = xfs_attr_node_hasname(args, state);
- if (error != -EEXIST)
- goto out;
- error = 0;
-
- ASSERT((*state)->path.blk[(*state)->path.active - 1].bp != NULL);
- ASSERT((*state)->path.blk[(*state)->path.active - 1].magic ==
- XFS_ATTR_LEAF_MAGIC);
-
- if (args->rmtblkno > 0) {
- error = xfs_attr_leaf_mark_incomplete(args, *state);
- if (error)
- goto out;
-
- error = xfs_attr_rmtval_invalidate(args);
- }
-out:
- if (error)
- xfs_da_state_free(*state);
-
- return error;
-}
-
-STATIC int
-xfs_attr_node_removename(
- struct xfs_da_args *args,
- struct xfs_da_state *state)
-{
- struct xfs_da_state_blk *blk;
- int retval;
-
- /*
- * Remove the name and update the hashvals in the tree.
- */
- blk = &state->path.blk[state->path.active-1];
- ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
- retval = xfs_attr3_leaf_remove(blk->bp, args);
- xfs_da3_fixhashpath(state, &state->path);
-
- return retval;
-}
-
-/*
- * Remove the attribute specified in @args.
- *
- * This will involve walking down the Btree, and may involve joining
- * leaf nodes and even joining intermediate nodes up to and including
- * the root node (a special case of an intermediate node).
- *
- * This routine is meant to function as either an in-line or delayed operation,
- * and may return -EAGAIN when the transaction needs to be rolled. Calling
- * functions will need to handle this, and call the function until a
- * successful error code is returned.
- */
-int
-xfs_attr_remove_iter(
- struct xfs_delattr_context *dac)
-{
- struct xfs_da_args *args = dac->da_args;
- struct xfs_da_state *state = dac->da_state;
- int retval, error = 0;
- struct xfs_inode *dp = args->dp;
-
- trace_xfs_attr_node_removename(args);
-
- switch (dac->dela_state) {
- case XFS_DAS_UNINIT:
- if (!xfs_inode_hasattr(dp))
- return -ENOATTR;
-
- /*
- * Shortform or leaf formats don't require transaction rolls and
- * thus state transitions. Call the right helper and return.
- */
- if (dp->i_afp->if_format == XFS_DINODE_FMT_LOCAL)
- return xfs_attr_sf_removename(args);
-
- if (xfs_attr_is_leaf(dp))
- return xfs_attr_leaf_removename(args);
-
- /*
- * Node format may require transaction rolls. Set up the
- * state context and fall into the state machine.
- */
- if (!dac->da_state) {
- error = xfs_attr_node_removename_setup(dac);
- if (error)
- return error;
- state = dac->da_state;
- }
-
- fallthrough;
- case XFS_DAS_RMTBLK:
- dac->dela_state = XFS_DAS_RMTBLK;
-
- /*
- * If there is an out-of-line value, de-allocate the blocks.
- * This is done before we remove the attribute so that we don't
- * overflow the maximum size of a transaction and/or hit a
- * deadlock.
- */
- if (args->rmtblkno > 0) {
- /*
- * May return -EAGAIN. Roll and repeat until all remote
- * blocks are removed.
- */
- error = xfs_attr_rmtval_remove(dac);
- if (error == -EAGAIN) {
- trace_xfs_attr_remove_iter_return(
- dac->dela_state, args->dp);
- return error;
- } else if (error) {
- goto out;
- }
-
- /*
- * Refill the state structure with buffers (the prior
- * calls released our buffers) and close out this
- * transaction before proceeding.
- */
- ASSERT(args->rmtblkno == 0);
- error = xfs_attr_refillstate(state);
- if (error)
- goto out;
- dac->dela_state = XFS_DAS_RM_NAME;
- dac->flags |= XFS_DAC_DEFER_FINISH;
- trace_xfs_attr_remove_iter_return(dac->dela_state, args->dp);
- return -EAGAIN;
- }
-
- fallthrough;
- case XFS_DAS_RM_NAME:
- /*
- * If we came here fresh from a transaction roll, reattach all
- * the buffers to the current transaction.
- */
- if (dac->dela_state == XFS_DAS_RM_NAME) {
- error = xfs_attr_refillstate(state);
- if (error)
- goto out;
- }
-
- retval = xfs_attr_node_removename(args, state);
-
- /*
- * Check to see if the tree needs to be collapsed. If so, roll
- * the transacton and fall into the shrink state.
- */
- if (retval && (state->path.active > 1)) {
- error = xfs_da3_join(state);
- if (error)
- goto out;
-
- dac->flags |= XFS_DAC_DEFER_FINISH;
- dac->dela_state = XFS_DAS_RM_SHRINK;
- trace_xfs_attr_remove_iter_return(
- dac->dela_state, args->dp);
- return -EAGAIN;
- }
-
- fallthrough;
- case XFS_DAS_RM_SHRINK:
- /*
- * If the result is small enough, push it all into the inode.
- * This is our final state so it's safe to return a dirty
- * transaction.
- */
- if (xfs_attr_is_leaf(dp))
- error = xfs_attr_node_shrink(args, state);
- ASSERT(error != -EAGAIN);
- break;
- default:
- ASSERT(0);
- error = -EINVAL;
- goto out;
- }
-out:
- if (state)
- xfs_da_state_free(state);
- return error;
-}
-
-/*
- * Fill in the disk block numbers in the state structure for the buffers
- * that are attached to the state structure.
- * This is done so that we can quickly reattach ourselves to those buffers
- * after some set of transaction commits have released these buffers.
- */
-STATIC int
-xfs_attr_fillstate(xfs_da_state_t *state)
-{
- xfs_da_state_path_t *path;
- xfs_da_state_blk_t *blk;
- int level;
-
- trace_xfs_attr_fillstate(state->args);
-
- /*
- * Roll down the "path" in the state structure, storing the on-disk
- * block number for those buffers in the "path".
- */
- path = &state->path;
- ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
- for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
- if (blk->bp) {
- blk->disk_blkno = xfs_buf_daddr(blk->bp);
- blk->bp = NULL;
- } else {
- blk->disk_blkno = 0;
- }
- }
-
- /*
- * Roll down the "altpath" in the state structure, storing the on-disk
- * block number for those buffers in the "altpath".
- */
- path = &state->altpath;
- ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
- for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
- if (blk->bp) {
- blk->disk_blkno = xfs_buf_daddr(blk->bp);
- blk->bp = NULL;
- } else {
- blk->disk_blkno = 0;
- }
- }
-
- return 0;
-}
-
-/*
- * Reattach the buffers to the state structure based on the disk block
- * numbers stored in the state structure.
- * This is done after some set of transaction commits have released those
- * buffers from our grip.
- */
-STATIC int
-xfs_attr_refillstate(xfs_da_state_t *state)
-{
- xfs_da_state_path_t *path;
- xfs_da_state_blk_t *blk;
- int level, error;
-
- trace_xfs_attr_refillstate(state->args);
-
- /*
- * Roll down the "path" in the state structure, storing the on-disk
- * block number for those buffers in the "path".
- */
- path = &state->path;
- ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
- for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
- if (blk->disk_blkno) {
- error = xfs_da3_node_read_mapped(state->args->trans,
- state->args->dp, blk->disk_blkno,
- &blk->bp, XFS_ATTR_FORK);
- if (error)
- return error;
- } else {
- blk->bp = NULL;
- }
- }
-
- /*
- * Roll down the "altpath" in the state structure, storing the on-disk
- * block number for those buffers in the "altpath".
- */
- path = &state->altpath;
- ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
- for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
- if (blk->disk_blkno) {
- error = xfs_da3_node_read_mapped(state->args->trans,
- state->args->dp, blk->disk_blkno,
- &blk->bp, XFS_ATTR_FORK);
- if (error)
- return error;
- } else {
- blk->bp = NULL;
- }
- }
-
- return 0;
-}
-
-/*
* Retrieve the attribute data from a node attribute list.
*
* This routine gets called for any attribute fork that has more than one
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 5e71f719bdd5..1af7abe29eef 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -28,6 +28,15 @@ struct xfs_attr_list_context;
*/
#define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */
+static inline bool xfs_has_larp(struct xfs_mount *mp)
+{
+#ifdef DEBUG
+ return xfs_globals.larp;
+#else
+ return false;
+#endif
+}
+
/*
* Kernel-internal version of the attrlist cursor.
*/
@@ -425,7 +434,7 @@ struct xfs_attr_list_context {
*/
/*
- * Enum values for xfs_delattr_context.da_state
+ * Enum values for xfs_attr_item.xattri_da_state
*
* These values are used by delayed attribute operations to keep track of where
* they were before they returned -EAGAIN. A return code of -EAGAIN signals the
@@ -434,46 +443,105 @@ struct xfs_attr_list_context {
* to where it was and resume executing where it left off.
*/
enum xfs_delattr_state {
- XFS_DAS_UNINIT = 0, /* No state has been set yet */
- XFS_DAS_RMTBLK, /* Removing remote blks */
- XFS_DAS_RM_NAME, /* Remove attr name */
- XFS_DAS_RM_SHRINK, /* We are shrinking the tree */
- XFS_DAS_FOUND_LBLK, /* We found leaf blk for attr */
- XFS_DAS_FOUND_NBLK, /* We found node blk for attr */
- XFS_DAS_FLIP_LFLAG, /* Flipped leaf INCOMPLETE attr flag */
- XFS_DAS_RM_LBLK, /* A rename is removing leaf blocks */
- XFS_DAS_RD_LEAF, /* Read in the new leaf */
- XFS_DAS_ALLOC_NODE, /* We are allocating node blocks */
- XFS_DAS_FLIP_NFLAG, /* Flipped node INCOMPLETE attr flag */
- XFS_DAS_RM_NBLK, /* A rename is removing node blocks */
- XFS_DAS_CLR_FLAG, /* Clear incomplete flag */
+ XFS_DAS_UNINIT = 0, /* No state has been set yet */
+
+ /*
+ * Initial sequence states. The replace setup code relies on the
+ * ADD and REMOVE states for a specific format to be sequential so
+ * that we can transform the initial operation to be performed
+ * according to the xfs_has_larp() state easily.
+ */
+ XFS_DAS_SF_ADD, /* Initial sf add state */
+ XFS_DAS_SF_REMOVE, /* Initial sf replace/remove state */
+
+ XFS_DAS_LEAF_ADD, /* Initial leaf add state */
+ XFS_DAS_LEAF_REMOVE, /* Initial leaf replace/remove state */
+
+ XFS_DAS_NODE_ADD, /* Initial node add state */
+ XFS_DAS_NODE_REMOVE, /* Initial node replace/remove state */
+
+ /* Leaf state set/replace/remove sequence */
+ XFS_DAS_LEAF_SET_RMT, /* set a remote xattr from a leaf */
+ XFS_DAS_LEAF_ALLOC_RMT, /* We are allocating remote blocks */
+ XFS_DAS_LEAF_REPLACE, /* Perform replace ops on a leaf */
+ XFS_DAS_LEAF_REMOVE_OLD, /* Start removing old attr from leaf */
+ XFS_DAS_LEAF_REMOVE_RMT, /* A rename is removing remote blocks */
+ XFS_DAS_LEAF_REMOVE_ATTR, /* Remove the old attr from a leaf */
+
+ /* Node state sequence, must match leaf state above */
+ XFS_DAS_NODE_SET_RMT, /* set a remote xattr from a node */
+ XFS_DAS_NODE_ALLOC_RMT, /* We are allocating remote blocks */
+ XFS_DAS_NODE_REPLACE, /* Perform replace ops on a node */
+ XFS_DAS_NODE_REMOVE_OLD, /* Start removing old attr from node */
+ XFS_DAS_NODE_REMOVE_RMT, /* A rename is removing remote blocks */
+ XFS_DAS_NODE_REMOVE_ATTR, /* Remove the old attr from a node */
+
+ XFS_DAS_DONE, /* finished operation */
};
+#define XFS_DAS_STRINGS \
+ { XFS_DAS_UNINIT, "XFS_DAS_UNINIT" }, \
+ { XFS_DAS_SF_ADD, "XFS_DAS_SF_ADD" }, \
+ { XFS_DAS_SF_REMOVE, "XFS_DAS_SF_REMOVE" }, \
+ { XFS_DAS_LEAF_ADD, "XFS_DAS_LEAF_ADD" }, \
+ { XFS_DAS_LEAF_REMOVE, "XFS_DAS_LEAF_REMOVE" }, \
+ { XFS_DAS_NODE_ADD, "XFS_DAS_NODE_ADD" }, \
+ { XFS_DAS_NODE_REMOVE, "XFS_DAS_NODE_REMOVE" }, \
+ { XFS_DAS_LEAF_SET_RMT, "XFS_DAS_LEAF_SET_RMT" }, \
+ { XFS_DAS_LEAF_ALLOC_RMT, "XFS_DAS_LEAF_ALLOC_RMT" }, \
+ { XFS_DAS_LEAF_REPLACE, "XFS_DAS_LEAF_REPLACE" }, \
+ { XFS_DAS_LEAF_REMOVE_OLD, "XFS_DAS_LEAF_REMOVE_OLD" }, \
+ { XFS_DAS_LEAF_REMOVE_RMT, "XFS_DAS_LEAF_REMOVE_RMT" }, \
+ { XFS_DAS_LEAF_REMOVE_ATTR, "XFS_DAS_LEAF_REMOVE_ATTR" }, \
+ { XFS_DAS_NODE_SET_RMT, "XFS_DAS_NODE_SET_RMT" }, \
+ { XFS_DAS_NODE_ALLOC_RMT, "XFS_DAS_NODE_ALLOC_RMT" }, \
+ { XFS_DAS_NODE_REPLACE, "XFS_DAS_NODE_REPLACE" }, \
+ { XFS_DAS_NODE_REMOVE_OLD, "XFS_DAS_NODE_REMOVE_OLD" }, \
+ { XFS_DAS_NODE_REMOVE_RMT, "XFS_DAS_NODE_REMOVE_RMT" }, \
+ { XFS_DAS_NODE_REMOVE_ATTR, "XFS_DAS_NODE_REMOVE_ATTR" }, \
+ { XFS_DAS_DONE, "XFS_DAS_DONE" }
+
/*
- * Defines for xfs_delattr_context.flags
+ * Defines for xfs_attr_item.xattri_flags
*/
-#define XFS_DAC_DEFER_FINISH 0x01 /* finish the transaction */
-#define XFS_DAC_LEAF_ADDNAME_INIT 0x02 /* xfs_attr_leaf_addname init*/
+#define XFS_DAC_LEAF_ADDNAME_INIT 0x01 /* xfs_attr_leaf_addname init*/
/*
* Context used for keeping track of delayed attribute operations
*/
-struct xfs_delattr_context {
- struct xfs_da_args *da_args;
+struct xfs_attr_item {
+ struct xfs_da_args *xattri_da_args;
+
+ /*
+ * Used by xfs_attr_set to hold a leaf buffer across a transaction roll
+ */
+ struct xfs_buf *xattri_leaf_bp;
/* Used in xfs_attr_rmtval_set_blk to roll through allocating blocks */
- struct xfs_bmbt_irec map;
- xfs_dablk_t lblkno;
- int blkcnt;
+ struct xfs_bmbt_irec xattri_map;
+ xfs_dablk_t xattri_lblkno;
+ int xattri_blkcnt;
/* Used in xfs_attr_node_removename to roll through removing blocks */
- struct xfs_da_state *da_state;
+ struct xfs_da_state *xattri_da_state;
/* Used to keep track of current state of delayed operation */
- unsigned int flags;
- enum xfs_delattr_state dela_state;
+ unsigned int xattri_flags;
+ enum xfs_delattr_state xattri_dela_state;
+
+ /*
+ * Attr operation being performed - XFS_ATTR_OP_FLAGS_*
+ */
+ unsigned int xattri_op_flags;
+
+ /*
+ * used to log this item to an intent containing a list of attrs to
+ * commit later
+ */
+ struct list_head xattri_list;
};
+
/*========================================================================
* Function prototypes for the kernel.
*========================================================================*/
@@ -489,11 +557,81 @@ bool xfs_attr_is_leaf(struct xfs_inode *ip);
int xfs_attr_get_ilocked(struct xfs_da_args *args);
int xfs_attr_get(struct xfs_da_args *args);
int xfs_attr_set(struct xfs_da_args *args);
-int xfs_attr_set_args(struct xfs_da_args *args);
-int xfs_attr_remove_args(struct xfs_da_args *args);
-int xfs_attr_remove_iter(struct xfs_delattr_context *dac);
+int xfs_attr_set_iter(struct xfs_attr_item *attr);
+int xfs_attr_remove_iter(struct xfs_attr_item *attr);
bool xfs_attr_namecheck(const void *name, size_t length);
-void xfs_delattr_context_init(struct xfs_delattr_context *dac,
- struct xfs_da_args *args);
+int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
+void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
+ unsigned int *total);
+
+extern struct kmem_cache *xfs_attri_cache;
+extern struct kmem_cache *xfs_attrd_cache;
+
+int __init xfs_attri_init_cache(void);
+void xfs_attri_destroy_cache(void);
+int __init xfs_attrd_init_cache(void);
+void xfs_attrd_destroy_cache(void);
+
+/*
+ * Check to see if the attr should be upgraded from non-existent or shortform to
+ * single-leaf-block attribute list.
+ */
+static inline bool
+xfs_attr_is_shortform(
+ struct xfs_inode *ip)
+{
+ return ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL ||
+ (ip->i_afp->if_format == XFS_DINODE_FMT_EXTENTS &&
+ ip->i_afp->if_nextents == 0);
+}
+
+static inline enum xfs_delattr_state
+xfs_attr_init_add_state(struct xfs_da_args *args)
+{
+ /*
+ * When called from the completion of a attr remove to determine the
+ * next state, the attribute fork may be null. This can occur only occur
+ * on a pure remove, but we grab the next state before we check if a
+ * replace operation is being performed. If we are called from any other
+ * context, i_afp is guaranteed to exist. Hence if the attr fork is
+ * null, we were called from a pure remove operation and so we are done.
+ */
+ if (!args->dp->i_afp)
+ return XFS_DAS_DONE;
+
+ args->op_flags |= XFS_DA_OP_ADDNAME;
+ if (xfs_attr_is_shortform(args->dp))
+ return XFS_DAS_SF_ADD;
+ if (xfs_attr_is_leaf(args->dp))
+ return XFS_DAS_LEAF_ADD;
+ return XFS_DAS_NODE_ADD;
+}
+
+static inline enum xfs_delattr_state
+xfs_attr_init_remove_state(struct xfs_da_args *args)
+{
+ args->op_flags |= XFS_DA_OP_REMOVE;
+ if (xfs_attr_is_shortform(args->dp))
+ return XFS_DAS_SF_REMOVE;
+ if (xfs_attr_is_leaf(args->dp))
+ return XFS_DAS_LEAF_REMOVE;
+ return XFS_DAS_NODE_REMOVE;
+}
+
+/*
+ * If we are logging the attributes, then we have to start with removal of the
+ * old attribute so that there is always consistent state that we can recover
+ * from if the system goes down part way through. We always log the new attr
+ * value, so even when we remove the attr first we still have the information in
+ * the log to finish the replace operation atomically.
+ */
+static inline enum xfs_delattr_state
+xfs_attr_init_replace_state(struct xfs_da_args *args)
+{
+ args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE;
+ if (xfs_has_larp(args->dp->i_mount))
+ return xfs_attr_init_remove_state(args);
+ return xfs_attr_init_add_state(args);
+}
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 014daa8c542d..15a990409463 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -28,6 +28,7 @@
#include "xfs_dir2.h"
#include "xfs_log.h"
#include "xfs_ag.h"
+#include "xfs_errortag.h"
/*
@@ -310,6 +311,15 @@ xfs_attr3_leaf_verify(
return fa;
/*
+ * Empty leaf blocks should never occur; they imply the existence of a
+ * software bug that needs fixing. xfs_repair also flags them as a
+ * corruption that needs fixing, so we should never let these go to
+ * disk.
+ */
+ if (ichdr.count == 0)
+ return __this_address;
+
+ /*
* firstused is the block offset of the first name info structure.
* Make sure it doesn't go off the block or crash into the header.
*/
@@ -445,6 +455,14 @@ xfs_attr3_leaf_read(
* Namespace helper routines
*========================================================================*/
+/*
+ * If we are in log recovery, then we want the lookup to ignore the INCOMPLETE
+ * flag on disk - if there's an incomplete attr then recovery needs to tear it
+ * down. If there's no incomplete attr, then recovery needs to tear that attr
+ * down to replace it with the attr that has been logged. In this case, the
+ * INCOMPLETE flag will not be set in attr->attr_filter, but rather
+ * XFS_DA_OP_RECOVERY will be set in args->op_flags.
+ */
static bool
xfs_attr_match(
struct xfs_da_args *args,
@@ -452,14 +470,18 @@ xfs_attr_match(
unsigned char *name,
int flags)
{
+
if (args->namelen != namelen)
return false;
if (memcmp(args->name, name, namelen) != 0)
return false;
- /*
- * If we are looking for incomplete entries, show only those, else only
- * show complete entries.
- */
+
+ /* Recovery ignores the INCOMPLETE flag. */
+ if ((args->op_flags & XFS_DA_OP_RECOVERY) &&
+ args->attr_filter == (flags & XFS_ATTR_NSP_ONDISK_MASK))
+ return true;
+
+ /* All remaining matches need to be filtered by INCOMPLETE state. */
if (args->attr_filter !=
(flags & (XFS_ATTR_NSP_ONDISK_MASK | XFS_ATTR_INCOMPLETE)))
return false;
@@ -798,6 +820,14 @@ xfs_attr_sf_removename(
sf = (struct xfs_attr_shortform *)dp->i_afp->if_u1.if_data;
error = xfs_attr_sf_findname(args, &sfe, &base);
+
+ /*
+ * If we are recovering an operation, finding nothing to
+ * remove is not an error - it just means there was nothing
+ * to clean up.
+ */
+ if (error == -ENOATTR && (args->op_flags & XFS_DA_OP_RECOVERY))
+ return 0;
if (error != -EEXIST)
return error;
size = xfs_attr_sf_entsize(sfe);
@@ -818,7 +848,7 @@ xfs_attr_sf_removename(
totsize -= size;
if (totsize == sizeof(xfs_attr_sf_hdr_t) && xfs_has_attr2(mp) &&
(dp->i_df.if_format != XFS_DINODE_FMT_BTREE) &&
- !(args->op_flags & XFS_DA_OP_ADDNAME)) {
+ !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) {
xfs_attr_fork_remove(dp, args->trans);
} else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
@@ -1127,9 +1157,17 @@ xfs_attr3_leaf_to_shortform(
goto out;
if (forkoff == -1) {
- ASSERT(xfs_has_attr2(dp->i_mount));
- ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
- xfs_attr_fork_remove(dp, args->trans);
+ /*
+ * Don't remove the attr fork if this operation is the first
+ * part of a attr replace operations. We're going to add a new
+ * attr immediately, so we need to keep the attr fork around in
+ * this case.
+ */
+ if (!(args->op_flags & XFS_DA_OP_REPLACE)) {
+ ASSERT(xfs_has_attr2(dp->i_mount));
+ ASSERT(dp->i_df.if_format != XFS_DINODE_FMT_BTREE);
+ xfs_attr_fork_remove(dp, args->trans);
+ }
goto out;
}
@@ -1189,6 +1227,11 @@ xfs_attr3_leaf_to_node(
trace_xfs_attr_leaf_to_node(args);
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_ATTR_LEAF_TO_NODE)) {
+ error = -EIO;
+ goto out;
+ }
+
error = xfs_da_grow_inode(args, &blkno);
if (error)
goto out;
@@ -1486,8 +1529,9 @@ xfs_attr3_leaf_add_work(
entry->flags = args->attr_filter;
if (tmp)
entry->flags |= XFS_ATTR_LOCAL;
- if (args->op_flags & XFS_DA_OP_RENAME) {
- entry->flags |= XFS_ATTR_INCOMPLETE;
+ if (args->op_flags & XFS_DA_OP_REPLACE) {
+ if (!xfs_has_larp(mp))
+ entry->flags |= XFS_ATTR_INCOMPLETE;
if ((args->blkno2 == args->blkno) &&
(args->index2 <= args->index)) {
args->index2++;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 83b95be9ded8..4250159ecced 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -568,14 +568,14 @@ xfs_attr_rmtval_stale(
*/
int
xfs_attr_rmtval_find_space(
- struct xfs_delattr_context *dac)
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
- struct xfs_bmbt_irec *map = &dac->map;
+ struct xfs_da_args *args = attr->xattri_da_args;
+ struct xfs_bmbt_irec *map = &attr->xattri_map;
int error;
- dac->lblkno = 0;
- dac->blkcnt = 0;
+ attr->xattri_lblkno = 0;
+ attr->xattri_blkcnt = 0;
args->rmtblkcnt = 0;
args->rmtblkno = 0;
memset(map, 0, sizeof(struct xfs_bmbt_irec));
@@ -584,8 +584,8 @@ xfs_attr_rmtval_find_space(
if (error)
return error;
- dac->blkcnt = args->rmtblkcnt;
- dac->lblkno = args->rmtblkno;
+ attr->xattri_blkcnt = args->rmtblkcnt;
+ attr->xattri_lblkno = args->rmtblkno;
return 0;
}
@@ -598,17 +598,18 @@ xfs_attr_rmtval_find_space(
*/
int
xfs_attr_rmtval_set_blk(
- struct xfs_delattr_context *dac)
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
+ struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_inode *dp = args->dp;
- struct xfs_bmbt_irec *map = &dac->map;
+ struct xfs_bmbt_irec *map = &attr->xattri_map;
int nmap;
int error;
nmap = 1;
- error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)dac->lblkno,
- dac->blkcnt, XFS_BMAPI_ATTRFORK, args->total,
+ error = xfs_bmapi_write(args->trans, dp,
+ (xfs_fileoff_t)attr->xattri_lblkno,
+ attr->xattri_blkcnt, XFS_BMAPI_ATTRFORK, args->total,
map, &nmap);
if (error)
return error;
@@ -618,8 +619,8 @@ xfs_attr_rmtval_set_blk(
(map->br_startblock != HOLESTARTBLOCK));
/* roll attribute extent map forwards */
- dac->lblkno += map->br_blockcount;
- dac->blkcnt -= map->br_blockcount;
+ attr->xattri_lblkno += map->br_blockcount;
+ attr->xattri_blkcnt -= map->br_blockcount;
return 0;
}
@@ -673,9 +674,9 @@ xfs_attr_rmtval_invalidate(
*/
int
xfs_attr_rmtval_remove(
- struct xfs_delattr_context *dac)
+ struct xfs_attr_item *attr)
{
- struct xfs_da_args *args = dac->da_args;
+ struct xfs_da_args *args = attr->xattri_da_args;
int error, done;
/*
@@ -695,8 +696,8 @@ xfs_attr_rmtval_remove(
* the parent
*/
if (!done) {
- dac->flags |= XFS_DAC_DEFER_FINISH;
- trace_xfs_attr_rmtval_remove_return(dac->dela_state, args->dp);
+ trace_xfs_attr_rmtval_remove_return(attr->xattri_dela_state,
+ args->dp);
return -EAGAIN;
}
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index d72eff30ca18..62b398edec3f 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -12,9 +12,9 @@ int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
xfs_buf_flags_t incore_flags);
int xfs_attr_rmtval_invalidate(struct xfs_da_args *args);
-int xfs_attr_rmtval_remove(struct xfs_delattr_context *dac);
+int xfs_attr_rmtval_remove(struct xfs_attr_item *attr);
int xfs_attr_rmt_find_hole(struct xfs_da_args *args);
int xfs_attr_rmtval_set_value(struct xfs_da_args *args);
-int xfs_attr_rmtval_set_blk(struct xfs_delattr_context *dac);
-int xfs_attr_rmtval_find_space(struct xfs_delattr_context *dac);
+int xfs_attr_rmtval_set_blk(struct xfs_attr_item *attr);
+int xfs_attr_rmtval_find_space(struct xfs_attr_item *attr);
#endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 9dc1ecb9713d..aa74f3fdb571 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -22,6 +22,7 @@
#include "xfs_trace.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
+#include "xfs_errortag.h"
/*
* xfs_da_btree.c
@@ -482,6 +483,9 @@ xfs_da3_split(
trace_xfs_da_split(state->args);
+ if (XFS_TEST_ERROR(false, state->mp, XFS_ERRTAG_DA_LEAF_SPLIT))
+ return -EIO;
+
/*
* Walk back up the tree splitting/inserting/adjusting as necessary.
* If we need to insert and there isn't room, split the node, then
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index deb368d041e3..ed2303e4d46a 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -85,19 +85,23 @@ typedef struct xfs_da_args {
* Operation flags:
*/
#define XFS_DA_OP_JUSTCHECK (1u << 0) /* check for ok with no space */
-#define XFS_DA_OP_RENAME (1u << 1) /* this is an atomic rename op */
+#define XFS_DA_OP_REPLACE (1u << 1) /* this is an atomic replace op */
#define XFS_DA_OP_ADDNAME (1u << 2) /* this is an add operation */
#define XFS_DA_OP_OKNOENT (1u << 3) /* lookup op, ENOENT ok, else die */
#define XFS_DA_OP_CILOOKUP (1u << 4) /* lookup returns CI name if found */
#define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */
+#define XFS_DA_OP_REMOVE (1u << 6) /* this is a remove operation */
+#define XFS_DA_OP_RECOVERY (1u << 7) /* Log recovery operation */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
- { XFS_DA_OP_RENAME, "RENAME" }, \
+ { XFS_DA_OP_REPLACE, "REPLACE" }, \
{ XFS_DA_OP_ADDNAME, "ADDNAME" }, \
{ XFS_DA_OP_OKNOENT, "OKNOENT" }, \
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
- { XFS_DA_OP_NOTIME, "NOTIME" }
+ { XFS_DA_OP_NOTIME, "NOTIME" }, \
+ { XFS_DA_OP_REMOVE, "REMOVE" }, \
+ { XFS_DA_OP_RECOVERY, "RECOVERY" }
/*
* Storage for holding state during Btree searches and split/join ops.
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1aa32bfdf0cc..ceb222b4f261 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -22,6 +22,10 @@
#include "xfs_refcount.h"
#include "xfs_bmap.h"
#include "xfs_alloc.h"
+#include "xfs_buf.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
static struct kmem_cache *xfs_defer_pending_cache;
@@ -184,6 +188,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type,
[XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
[XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
+ [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
};
static bool
@@ -780,17 +785,25 @@ xfs_defer_ops_continue(
struct xfs_trans *tp,
struct xfs_defer_resources *dres)
{
+ unsigned int i;
+
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
- /* Lock and join the captured inode to the new transaction. */
+ /* Lock the captured resources to the new transaction. */
if (dfc->dfc_held.dr_inos == 2)
xfs_lock_two_inodes(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL,
dfc->dfc_held.dr_ip[1], XFS_ILOCK_EXCL);
else if (dfc->dfc_held.dr_inos == 1)
xfs_ilock(dfc->dfc_held.dr_ip[0], XFS_ILOCK_EXCL);
+
+ for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
+ xfs_buf_lock(dfc->dfc_held.dr_bp[i]);
+
+ /* Join the captured resources to the new transaction. */
xfs_defer_restore_resources(tp, &dfc->dfc_held);
memcpy(dres, &dfc->dfc_held, sizeof(struct xfs_defer_resources));
+ dres->dr_bufs = 0;
/* Move captured dfops chain and state to the transaction. */
list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
@@ -860,7 +873,12 @@ xfs_defer_init_item_caches(void)
error = xfs_extfree_intent_init_cache();
if (error)
goto err;
-
+ error = xfs_attri_init_cache();
+ if (error)
+ goto err;
+ error = xfs_attrd_init_cache();
+ if (error)
+ goto err;
return 0;
err:
xfs_defer_destroy_item_caches();
@@ -871,6 +889,8 @@ err:
void
xfs_defer_destroy_item_caches(void)
{
+ xfs_attri_destroy_cache();
+ xfs_attrd_destroy_cache();
xfs_extfree_intent_destroy_cache();
xfs_bmap_intent_destroy_cache();
xfs_refcount_intent_destroy_cache();
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 7bb8a31ad65b..114a3a4930a3 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -19,6 +19,7 @@ enum xfs_defer_ops_type {
XFS_DEFER_OPS_TYPE_RMAP,
XFS_DEFER_OPS_TYPE_FREE,
XFS_DEFER_OPS_TYPE_AGFL_FREE,
+ XFS_DEFER_OPS_TYPE_ATTR,
XFS_DEFER_OPS_TYPE_MAX,
};
@@ -63,6 +64,8 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+extern const struct xfs_defer_op_type xfs_attr_defer_type;
+
/*
* Deferred operation item relogging limits.
diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index a23a52e643ad..5362908164b0 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -59,7 +59,10 @@
#define XFS_ERRTAG_REDUCE_MAX_IEXTENTS 36
#define XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT 37
#define XFS_ERRTAG_AG_RESV_FAIL 38
-#define XFS_ERRTAG_MAX 39
+#define XFS_ERRTAG_LARP 39
+#define XFS_ERRTAG_DA_LEAF_SPLIT 40
+#define XFS_ERRTAG_ATTR_LEAF_TO_NODE 41
+#define XFS_ERRTAG_MAX 42
/*
* Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -103,5 +106,8 @@
#define XFS_RANDOM_REDUCE_MAX_IEXTENTS 1
#define XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT 1
#define XFS_RANDOM_AG_RESV_FAIL 1
+#define XFS_RANDOM_LARP 1
+#define XFS_RANDOM_DA_LEAF_SPLIT 1
+#define XFS_RANDOM_ATTR_LEAF_TO_NODE 1
#endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 96fd49fbc9fa..afdfc8108c5f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -390,7 +390,9 @@ xfs_sb_has_incompat_feature(
return (sbp->sb_features_incompat & feature) != 0;
}
-#define XFS_SB_FEAT_INCOMPAT_LOG_ALL 0
+#define XFS_SB_FEAT_INCOMPAT_LOG_XATTRS (1 << 0) /* Delayed Attributes */
+#define XFS_SB_FEAT_INCOMPAT_LOG_ALL \
+ (XFS_SB_FEAT_INCOMPAT_LOG_XATTRS)
#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
static inline bool
xfs_sb_has_incompat_log_feature(
@@ -415,6 +417,11 @@ xfs_sb_add_incompat_log_features(
sbp->sb_features_log_incompat |= features;
}
+static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp)
+{
+ return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat &
+ XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
+}
static inline bool
xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index afce51633f03..f7edd1ecf6d9 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -113,7 +113,12 @@ struct xfs_unmount_log_format {
#define XLOG_REG_TYPE_CUD_FORMAT 24
#define XLOG_REG_TYPE_BUI_FORMAT 25
#define XLOG_REG_TYPE_BUD_FORMAT 26
-#define XLOG_REG_TYPE_MAX 26
+#define XLOG_REG_TYPE_ATTRI_FORMAT 27
+#define XLOG_REG_TYPE_ATTRD_FORMAT 28
+#define XLOG_REG_TYPE_ATTR_NAME 29
+#define XLOG_REG_TYPE_ATTR_VALUE 30
+#define XLOG_REG_TYPE_MAX 30
+
/*
* Flags to log operation header
@@ -236,6 +241,8 @@ typedef struct xfs_trans_header {
#define XFS_LI_CUD 0x1243
#define XFS_LI_BUI 0x1244 /* bmbt update intent */
#define XFS_LI_BUD 0x1245
+#define XFS_LI_ATTRI 0x1246 /* attr set/remove intent*/
+#define XFS_LI_ATTRD 0x1247 /* attr set/remove done */
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -251,7 +258,9 @@ typedef struct xfs_trans_header {
{ XFS_LI_CUI, "XFS_LI_CUI" }, \
{ XFS_LI_CUD, "XFS_LI_CUD" }, \
{ XFS_LI_BUI, "XFS_LI_BUI" }, \
- { XFS_LI_BUD, "XFS_LI_BUD" }
+ { XFS_LI_BUD, "XFS_LI_BUD" }, \
+ { XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \
+ { XFS_LI_ATTRD, "XFS_LI_ATTRD" }
/*
* Inode Log Item Format definitions.
@@ -893,4 +902,36 @@ struct xfs_icreate_log {
__be32 icl_gen; /* inode generation number to use */
};
+/*
+ * Flags for deferred attribute operations.
+ * Upper bits are flags, lower byte is type code
+ */
+#define XFS_ATTR_OP_FLAGS_SET 1 /* Set the attribute */
+#define XFS_ATTR_OP_FLAGS_REMOVE 2 /* Remove the attribute */
+#define XFS_ATTR_OP_FLAGS_REPLACE 3 /* Replace the attribute */
+#define XFS_ATTR_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */
+
+/*
+ * This is the structure used to lay out an attr log item in the
+ * log.
+ */
+struct xfs_attri_log_format {
+ uint16_t alfi_type; /* attri log item type */
+ uint16_t alfi_size; /* size of this item */
+ uint32_t __pad; /* pad to 64 bit aligned */
+ uint64_t alfi_id; /* attri identifier */
+ uint64_t alfi_ino; /* the inode for this attr operation */
+ uint32_t alfi_op_flags; /* marks the op as a set or remove */
+ uint32_t alfi_name_len; /* attr name length */
+ uint32_t alfi_value_len; /* attr value length */
+ uint32_t alfi_attr_flags;/* attr flags */
+};
+
+struct xfs_attrd_log_format {
+ uint16_t alfd_type; /* attrd log item type */
+ uint16_t alfd_size; /* size of this item */
+ uint32_t __pad; /* pad to 64 bit aligned */
+ uint64_t alfd_alf_id; /* id of corresponding attri */
+};
+
#endif /* __XFS_LOG_FORMAT_H__ */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index ff69a0000817..32e216255cb0 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -72,6 +72,8 @@ extern const struct xlog_recover_item_ops xlog_rui_item_ops;
extern const struct xlog_recover_item_ops xlog_rud_item_ops;
extern const struct xlog_recover_item_ops xlog_cui_item_ops;
extern const struct xlog_recover_item_ops xlog_cud_item_ops;
+extern const struct xlog_recover_item_ops xlog_attri_item_ops;
+extern const struct xlog_recover_item_ops xlog_attrd_item_ops;
/*
* Macros, structures, prototypes for internal log manager use.
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index bf1f3607d0b6..97b54ac3075f 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -23,6 +23,8 @@
#include "xfs_rmap_btree.h"
#include "xfs_log.h"
#include "xfs_trans_priv.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
#include "xfs_ag.h"
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 5c52ee869272..3df9c1782ead 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -10,12 +10,12 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trace.h"
#include "xfs_error.h"
#include "xfs_acl.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include "xfs_trans.h"
#include <linux/posix_acl_xattr.h>
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
new file mode 100644
index 000000000000..e8ac88d9fd14
--- /dev/null
+++ b/fs/xfs/xfs_attr_item.c
@@ -0,0 +1,824 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2022 Oracle. All Rights Reserved.
+ * Author: Allison Henderson <allison.henderson@oracle.com>
+ */
+
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_shared.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_priv.h"
+#include "xfs_log.h"
+#include "xfs_inode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_attr.h"
+#include "xfs_attr_item.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_trans_space.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+static const struct xfs_item_ops xfs_attri_item_ops;
+static const struct xfs_item_ops xfs_attrd_item_ops;
+static struct xfs_attrd_log_item *xfs_trans_get_attrd(struct xfs_trans *tp,
+ struct xfs_attri_log_item *attrip);
+
+static inline struct xfs_attri_log_item *ATTRI_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_attri_log_item, attri_item);
+}
+
+STATIC void
+xfs_attri_item_free(
+ struct xfs_attri_log_item *attrip)
+{
+ kmem_free(attrip->attri_item.li_lv_shadow);
+ kvfree(attrip);
+}
+
+/*
+ * Freeing the attrip requires that we remove it from the AIL if it has already
+ * been placed there. However, the ATTRI may not yet have been placed in the
+ * AIL when called by xfs_attri_release() from ATTRD processing due to the
+ * ordering of committed vs unpin operations in bulk insert operations. Hence
+ * the reference count to ensure only the last caller frees the ATTRI.
+ */
+STATIC void
+xfs_attri_release(
+ struct xfs_attri_log_item *attrip)
+{
+ ASSERT(atomic_read(&attrip->attri_refcount) > 0);
+ if (!atomic_dec_and_test(&attrip->attri_refcount))
+ return;
+
+ xfs_trans_ail_delete(&attrip->attri_item, 0);
+ xfs_attri_item_free(attrip);
+}
+
+STATIC void
+xfs_attri_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
+{
+ struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
+
+ *nvecs += 2;
+ *nbytes += sizeof(struct xfs_attri_log_format) +
+ xlog_calc_iovec_len(attrip->attri_name_len);
+
+ if (!attrip->attri_value_len)
+ return;
+
+ *nvecs += 1;
+ *nbytes += xlog_calc_iovec_len(attrip->attri_value_len);
+}
+
+/*
+ * This is called to fill in the log iovecs for the given attri log
+ * item. We use 1 iovec for the attri_format_item, 1 for the name, and
+ * another for the value if it is present
+ */
+STATIC void
+xfs_attri_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
+
+ attrip->attri_format.alfi_type = XFS_LI_ATTRI;
+ attrip->attri_format.alfi_size = 1;
+
+ /*
+ * This size accounting must be done before copying the attrip into the
+ * iovec. If we do it after, the wrong size will be recorded to the log
+ * and we trip across assertion checks for bad region sizes later during
+ * the log recovery.
+ */
+
+ ASSERT(attrip->attri_name_len > 0);
+ attrip->attri_format.alfi_size++;
+
+ if (attrip->attri_value_len > 0)
+ attrip->attri_format.alfi_size++;
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT,
+ &attrip->attri_format,
+ sizeof(struct xfs_attri_log_format));
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_NAME,
+ attrip->attri_name,
+ attrip->attri_name_len);
+ if (attrip->attri_value_len > 0)
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTR_VALUE,
+ attrip->attri_value,
+ attrip->attri_value_len);
+}
+
+/*
+ * The unpin operation is the last place an ATTRI is manipulated in the log. It
+ * is either inserted in the AIL or aborted in the event of a log I/O error. In
+ * either case, the ATTRI transaction has been successfully committed to make
+ * it this far. Therefore, we expect whoever committed the ATTRI to either
+ * construct and commit the ATTRD or drop the ATTRD's reference in the event of
+ * error. Simply drop the log's ATTRI reference now that the log is done with
+ * it.
+ */
+STATIC void
+xfs_attri_item_unpin(
+ struct xfs_log_item *lip,
+ int remove)
+{
+ xfs_attri_release(ATTRI_ITEM(lip));
+}
+
+
+STATIC void
+xfs_attri_item_release(
+ struct xfs_log_item *lip)
+{
+ xfs_attri_release(ATTRI_ITEM(lip));
+}
+
+/*
+ * Allocate and initialize an attri item. Caller may allocate an additional
+ * trailing buffer for name and value
+ */
+STATIC struct xfs_attri_log_item *
+xfs_attri_init(
+ struct xfs_mount *mp,
+ uint32_t name_len,
+ uint32_t value_len)
+
+{
+ struct xfs_attri_log_item *attrip;
+ uint32_t buffer_size = name_len + value_len;
+
+ if (buffer_size) {
+ /*
+ * This could be over 64kB in length, so we have to use
+ * kvmalloc() for this. But kvmalloc() utterly sucks, so we
+ * use own version.
+ */
+ attrip = xlog_kvmalloc(sizeof(struct xfs_attri_log_item) +
+ buffer_size);
+ } else {
+ attrip = kmem_cache_alloc(xfs_attri_cache,
+ GFP_NOFS | __GFP_NOFAIL);
+ }
+ memset(attrip, 0, sizeof(struct xfs_attri_log_item));
+
+ attrip->attri_name_len = name_len;
+ if (name_len)
+ attrip->attri_name = ((char *)attrip) +
+ sizeof(struct xfs_attri_log_item);
+ else
+ attrip->attri_name = NULL;
+
+ attrip->attri_value_len = value_len;
+ if (value_len)
+ attrip->attri_value = ((char *)attrip) +
+ sizeof(struct xfs_attri_log_item) +
+ name_len;
+ else
+ attrip->attri_value = NULL;
+
+ xfs_log_item_init(mp, &attrip->attri_item, XFS_LI_ATTRI,
+ &xfs_attri_item_ops);
+ attrip->attri_format.alfi_id = (uintptr_t)(void *)attrip;
+ atomic_set(&attrip->attri_refcount, 2);
+
+ return attrip;
+}
+
+/*
+ * Copy an attr format buffer from the given buf, and into the destination attr
+ * format structure.
+ */
+STATIC int
+xfs_attri_copy_format(
+ struct xfs_log_iovec *buf,
+ struct xfs_attri_log_format *dst_attr_fmt)
+{
+ struct xfs_attri_log_format *src_attr_fmt = buf->i_addr;
+ size_t len;
+
+ len = sizeof(struct xfs_attri_log_format);
+ if (buf->i_len != len) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ return -EFSCORRUPTED;
+ }
+
+ memcpy((char *)dst_attr_fmt, (char *)src_attr_fmt, len);
+ return 0;
+}
+
+static inline struct xfs_attrd_log_item *ATTRD_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_attrd_log_item, attrd_item);
+}
+
+STATIC void
+xfs_attrd_item_free(struct xfs_attrd_log_item *attrdp)
+{
+ kmem_free(attrdp->attrd_item.li_lv_shadow);
+ kmem_free(attrdp);
+}
+
+STATIC void
+xfs_attrd_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
+{
+ *nvecs += 1;
+ *nbytes += sizeof(struct xfs_attrd_log_format);
+}
+
+/*
+ * This is called to fill in the log iovecs for the given attrd log item. We use
+ * only 1 iovec for the attrd_format, and we point that at the attr_log_format
+ * structure embedded in the attrd item.
+ */
+STATIC void
+xfs_attrd_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
+
+ attrdp->attrd_format.alfd_type = XFS_LI_ATTRD;
+ attrdp->attrd_format.alfd_size = 1;
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRD_FORMAT,
+ &attrdp->attrd_format,
+ sizeof(struct xfs_attrd_log_format));
+}
+
+/*
+ * The ATTRD is either committed or aborted if the transaction is canceled. If
+ * the transaction is canceled, drop our reference to the ATTRI and free the
+ * ATTRD.
+ */
+STATIC void
+xfs_attrd_item_release(
+ struct xfs_log_item *lip)
+{
+ struct xfs_attrd_log_item *attrdp = ATTRD_ITEM(lip);
+
+ xfs_attri_release(attrdp->attrd_attrip);
+ xfs_attrd_item_free(attrdp);
+}
+
+static struct xfs_log_item *
+xfs_attrd_item_intent(
+ struct xfs_log_item *lip)
+{
+ return &ATTRD_ITEM(lip)->attrd_attrip->attri_item;
+}
+
+/*
+ * Performs one step of an attribute update intent and marks the attrd item
+ * dirty.. An attr operation may be a set or a remove. Note that the
+ * transaction is marked dirty regardless of whether the operation succeeds or
+ * fails to support the ATTRI/ATTRD lifecycle rules.
+ */
+STATIC int
+xfs_xattri_finish_update(
+ struct xfs_attr_item *attr,
+ struct xfs_attrd_log_item *attrdp)
+{
+ struct xfs_da_args *args = attr->xattri_da_args;
+ int error;
+
+ if (XFS_TEST_ERROR(false, args->dp->i_mount, XFS_ERRTAG_LARP)) {
+ error = -EIO;
+ goto out;
+ }
+
+ error = xfs_attr_set_iter(attr);
+ if (!error && attr->xattri_dela_state != XFS_DAS_DONE)
+ error = -EAGAIN;
+out:
+ /*
+ * Mark the transaction dirty, even on error. This ensures the
+ * transaction is aborted, which:
+ *
+ * 1.) releases the ATTRI and frees the ATTRD
+ * 2.) shuts down the filesystem
+ */
+ args->trans->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
+
+ /*
+ * attr intent/done items are null when logged attributes are disabled
+ */
+ if (attrdp)
+ set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+ return error;
+}
+
+/* Log an attr to the intent item. */
+STATIC void
+xfs_attr_log_item(
+ struct xfs_trans *tp,
+ struct xfs_attri_log_item *attrip,
+ struct xfs_attr_item *attr)
+{
+ struct xfs_attri_log_format *attrp;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ set_bit(XFS_LI_DIRTY, &attrip->attri_item.li_flags);
+
+ /*
+ * At this point the xfs_attr_item has been constructed, and we've
+ * created the log intent. Fill in the attri log item and log format
+ * structure with fields from this xfs_attr_item
+ */
+ attrp = &attrip->attri_format;
+ attrp->alfi_ino = attr->xattri_da_args->dp->i_ino;
+ attrp->alfi_op_flags = attr->xattri_op_flags;
+ attrp->alfi_value_len = attr->xattri_da_args->valuelen;
+ attrp->alfi_name_len = attr->xattri_da_args->namelen;
+ attrp->alfi_attr_flags = attr->xattri_da_args->attr_filter;
+
+ memcpy(attrip->attri_name, attr->xattri_da_args->name,
+ attr->xattri_da_args->namelen);
+ memcpy(attrip->attri_value, attr->xattri_da_args->value,
+ attr->xattri_da_args->valuelen);
+ attrip->attri_name_len = attr->xattri_da_args->namelen;
+ attrip->attri_value_len = attr->xattri_da_args->valuelen;
+}
+
+/* Get an ATTRI. */
+static struct xfs_log_item *
+xfs_attr_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_attri_log_item *attrip;
+ struct xfs_attr_item *attr;
+
+ ASSERT(count == 1);
+
+ if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
+ return NULL;
+
+ /*
+ * Each attr item only performs one attribute operation at a time, so
+ * this is a list of one
+ */
+ list_for_each_entry(attr, items, xattri_list) {
+ attrip = xfs_attri_init(mp, attr->xattri_da_args->namelen,
+ attr->xattri_da_args->valuelen);
+ if (attrip == NULL)
+ return NULL;
+
+ xfs_trans_add_item(tp, &attrip->attri_item);
+ xfs_attr_log_item(tp, attrip, attr);
+ }
+
+ return &attrip->attri_item;
+}
+
+/* Process an attr. */
+STATIC int
+xfs_attr_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_log_item *done,
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+{
+ struct xfs_attr_item *attr;
+ struct xfs_attrd_log_item *done_item = NULL;
+ int error;
+
+ attr = container_of(item, struct xfs_attr_item, xattri_list);
+ if (done)
+ done_item = ATTRD_ITEM(done);
+
+ /*
+ * Always reset trans after EAGAIN cycle
+ * since the transaction is new
+ */
+ attr->xattri_da_args->trans = tp;
+
+ error = xfs_xattri_finish_update(attr, done_item);
+ if (error != -EAGAIN)
+ kmem_free(attr);
+
+ return error;
+}
+
+/* Abort all pending ATTRs. */
+STATIC void
+xfs_attr_abort_intent(
+ struct xfs_log_item *intent)
+{
+ xfs_attri_release(ATTRI_ITEM(intent));
+}
+
+/* Cancel an attr */
+STATIC void
+xfs_attr_cancel_item(
+ struct list_head *item)
+{
+ struct xfs_attr_item *attr;
+
+ attr = container_of(item, struct xfs_attr_item, xattri_list);
+ kmem_free(attr);
+}
+
+STATIC xfs_lsn_t
+xfs_attri_item_committed(
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
+{
+ struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
+
+ /*
+ * The attrip refers to xfs_attr_item memory to log the name and value
+ * with the intent item. This already occurred when the intent was
+ * committed so these fields are no longer accessed. Clear them out of
+ * caution since we're about to free the xfs_attr_item.
+ */
+ attrip->attri_name = NULL;
+ attrip->attri_value = NULL;
+
+ /*
+ * The ATTRI is logged only once and cannot be moved in the log, so
+ * simply return the lsn at which it's been logged.
+ */
+ return lsn;
+}
+
+STATIC bool
+xfs_attri_item_match(
+ struct xfs_log_item *lip,
+ uint64_t intent_id)
+{
+ return ATTRI_ITEM(lip)->attri_format.alfi_id == intent_id;
+}
+
+/* Is this recovered ATTRI format ok? */
+static inline bool
+xfs_attri_validate(
+ struct xfs_mount *mp,
+ struct xfs_attri_log_format *attrp)
+{
+ unsigned int op = attrp->alfi_op_flags &
+ XFS_ATTR_OP_FLAGS_TYPE_MASK;
+
+ if (attrp->__pad != 0)
+ return false;
+
+ /* alfi_op_flags should be either a set or remove */
+ switch (op) {
+ case XFS_ATTR_OP_FLAGS_SET:
+ case XFS_ATTR_OP_FLAGS_REPLACE:
+ case XFS_ATTR_OP_FLAGS_REMOVE:
+ break;
+ default:
+ return false;
+ }
+
+ if (attrp->alfi_value_len > XATTR_SIZE_MAX)
+ return false;
+
+ if ((attrp->alfi_name_len > XATTR_NAME_MAX) ||
+ (attrp->alfi_name_len == 0))
+ return false;
+
+ return xfs_verify_ino(mp, attrp->alfi_ino);
+}
+
+/*
+ * Process an attr intent item that was recovered from the log. We need to
+ * delete the attr that it describes.
+ */
+STATIC int
+xfs_attri_item_recover(
+ struct xfs_log_item *lip,
+ struct list_head *capture_list)
+{
+ struct xfs_attri_log_item *attrip = ATTRI_ITEM(lip);
+ struct xfs_attr_item *attr;
+ struct xfs_mount *mp = lip->li_log->l_mp;
+ struct xfs_inode *ip;
+ struct xfs_da_args *args;
+ struct xfs_trans *tp;
+ struct xfs_trans_res tres;
+ struct xfs_attri_log_format *attrp;
+ int error, ret = 0;
+ int total;
+ int local;
+ struct xfs_attrd_log_item *done_item = NULL;
+
+ /*
+ * First check the validity of the attr described by the ATTRI. If any
+ * are bad, then assume that all are bad and just toss the ATTRI.
+ */
+ attrp = &attrip->attri_format;
+ if (!xfs_attri_validate(mp, attrp) ||
+ !xfs_attr_namecheck(attrip->attri_name, attrip->attri_name_len))
+ return -EFSCORRUPTED;
+
+ error = xlog_recover_iget(mp, attrp->alfi_ino, &ip);
+ if (error)
+ return error;
+
+ attr = kmem_zalloc(sizeof(struct xfs_attr_item) +
+ sizeof(struct xfs_da_args), KM_NOFS);
+ args = (struct xfs_da_args *)(attr + 1);
+
+ attr->xattri_da_args = args;
+ attr->xattri_op_flags = attrp->alfi_op_flags;
+
+ args->dp = ip;
+ args->geo = mp->m_attr_geo;
+ args->whichfork = XFS_ATTR_FORK;
+ args->name = attrip->attri_name;
+ args->namelen = attrp->alfi_name_len;
+ args->hashval = xfs_da_hashname(args->name, args->namelen);
+ args->attr_filter = attrp->alfi_attr_flags;
+ args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT;
+
+ switch (attrp->alfi_op_flags & XFS_ATTR_OP_FLAGS_TYPE_MASK) {
+ case XFS_ATTR_OP_FLAGS_SET:
+ case XFS_ATTR_OP_FLAGS_REPLACE:
+ args->value = attrip->attri_value;
+ args->valuelen = attrp->alfi_value_len;
+ args->total = xfs_attr_calc_size(args, &local);
+ if (xfs_inode_hasattr(args->dp))
+ attr->xattri_dela_state = xfs_attr_init_replace_state(args);
+ else
+ attr->xattri_dela_state = xfs_attr_init_add_state(args);
+ break;
+ case XFS_ATTR_OP_FLAGS_REMOVE:
+ if (!xfs_inode_hasattr(args->dp))
+ goto out;
+ attr->xattri_dela_state = xfs_attr_init_remove_state(args);
+ break;
+ default:
+ ASSERT(0);
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ xfs_init_attr_trans(args, &tres, &total);
+ error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp);
+ if (error)
+ goto out;
+
+ args->trans = tp;
+ done_item = xfs_trans_get_attrd(tp, attrip);
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+
+ ret = xfs_xattri_finish_update(attr, done_item);
+ if (ret == -EAGAIN) {
+ /* There's more work to do, so add it to this transaction */
+ xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list);
+ } else
+ error = ret;
+
+ if (error) {
+ xfs_trans_cancel(tp);
+ goto out_unlock;
+ }
+
+ error = xfs_defer_ops_capture_and_commit(tp, capture_list);
+
+out_unlock:
+ if (attr->xattri_leaf_bp)
+ xfs_buf_relse(attr->xattri_leaf_bp);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+out:
+ if (ret != -EAGAIN)
+ kmem_free(attr);
+ return error;
+}
+
+/* Re-log an intent item to push the log tail forward. */
+static struct xfs_log_item *
+xfs_attri_item_relog(
+ struct xfs_log_item *intent,
+ struct xfs_trans *tp)
+{
+ struct xfs_attrd_log_item *attrdp;
+ struct xfs_attri_log_item *old_attrip;
+ struct xfs_attri_log_item *new_attrip;
+ struct xfs_attri_log_format *new_attrp;
+ struct xfs_attri_log_format *old_attrp;
+
+ old_attrip = ATTRI_ITEM(intent);
+ old_attrp = &old_attrip->attri_format;
+
+ tp->t_flags |= XFS_TRANS_DIRTY;
+ attrdp = xfs_trans_get_attrd(tp, old_attrip);
+ set_bit(XFS_LI_DIRTY, &attrdp->attrd_item.li_flags);
+
+ new_attrip = xfs_attri_init(tp->t_mountp, old_attrp->alfi_name_len,
+ old_attrp->alfi_value_len);
+ new_attrp = &new_attrip->attri_format;
+
+ new_attrp->alfi_ino = old_attrp->alfi_ino;
+ new_attrp->alfi_op_flags = old_attrp->alfi_op_flags;
+ new_attrp->alfi_value_len = old_attrp->alfi_value_len;
+ new_attrp->alfi_name_len = old_attrp->alfi_name_len;
+ new_attrp->alfi_attr_flags = old_attrp->alfi_attr_flags;
+
+ memcpy(new_attrip->attri_name, old_attrip->attri_name,
+ new_attrip->attri_name_len);
+
+ if (new_attrip->attri_value_len > 0)
+ memcpy(new_attrip->attri_value, old_attrip->attri_value,
+ new_attrip->attri_value_len);
+
+ xfs_trans_add_item(tp, &new_attrip->attri_item);
+ set_bit(XFS_LI_DIRTY, &new_attrip->attri_item.li_flags);
+
+ return &new_attrip->attri_item;
+}
+
+STATIC int
+xlog_recover_attri_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_attri_log_item *attrip;
+ struct xfs_attri_log_format *attri_formatp;
+ int region = 0;
+
+ attri_formatp = item->ri_buf[region].i_addr;
+
+ /* Validate xfs_attri_log_format */
+ if (!xfs_attri_validate(mp, attri_formatp)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ return -EFSCORRUPTED;
+ }
+
+ /* memory alloc failure will cause replay to abort */
+ attrip = xfs_attri_init(mp, attri_formatp->alfi_name_len,
+ attri_formatp->alfi_value_len);
+ if (attrip == NULL)
+ return -ENOMEM;
+
+ error = xfs_attri_copy_format(&item->ri_buf[region],
+ &attrip->attri_format);
+ if (error)
+ goto out;
+
+ region++;
+ memcpy(attrip->attri_name, item->ri_buf[region].i_addr,
+ attrip->attri_name_len);
+
+ if (!xfs_attr_namecheck(attrip->attri_name, attrip->attri_name_len)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ if (attrip->attri_value_len > 0) {
+ region++;
+ memcpy(attrip->attri_value, item->ri_buf[region].i_addr,
+ attrip->attri_value_len);
+ }
+
+ /*
+ * The ATTRI has two references. One for the ATTRD and one for ATTRI to
+ * ensure it makes it into the AIL. Insert the ATTRI into the AIL
+ * directly and drop the ATTRI reference. Note that
+ * xfs_trans_ail_update() drops the AIL lock.
+ */
+ xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn);
+ xfs_attri_release(attrip);
+ return 0;
+out:
+ xfs_attri_item_free(attrip);
+ return error;
+}
+
+/*
+ * This routine is called to allocate an "attr free done" log item.
+ */
+static struct xfs_attrd_log_item *
+xfs_trans_get_attrd(struct xfs_trans *tp,
+ struct xfs_attri_log_item *attrip)
+{
+ struct xfs_attrd_log_item *attrdp;
+
+ ASSERT(tp != NULL);
+
+ attrdp = kmem_cache_zalloc(xfs_attrd_cache, GFP_NOFS | __GFP_NOFAIL);
+
+ xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
+ &xfs_attrd_item_ops);
+ attrdp->attrd_attrip = attrip;
+ attrdp->attrd_format.alfd_alf_id = attrip->attri_format.alfi_id;
+
+ xfs_trans_add_item(tp, &attrdp->attrd_item);
+ return attrdp;
+}
+
+/* Get an ATTRD so we can process all the attrs. */
+static struct xfs_log_item *
+xfs_attr_create_done(
+ struct xfs_trans *tp,
+ struct xfs_log_item *intent,
+ unsigned int count)
+{
+ if (!intent)
+ return NULL;
+
+ return &xfs_trans_get_attrd(tp, ATTRI_ITEM(intent))->attrd_item;
+}
+
+const struct xfs_defer_op_type xfs_attr_defer_type = {
+ .max_items = 1,
+ .create_intent = xfs_attr_create_intent,
+ .abort_intent = xfs_attr_abort_intent,
+ .create_done = xfs_attr_create_done,
+ .finish_item = xfs_attr_finish_item,
+ .cancel_item = xfs_attr_cancel_item,
+};
+
+/*
+ * This routine is called when an ATTRD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding ATTRI if
+ * it was still in the log. To do this it searches the AIL for the ATTRI with
+ * an id equal to that in the ATTRD format structure. If we find it we drop
+ * the ATTRD reference, which removes the ATTRI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_attrd_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_attrd_log_format *attrd_formatp;
+
+ attrd_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_attrd_log_format)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_ATTRI,
+ attrd_formatp->alfd_alf_id);
+ return 0;
+}
+
+static const struct xfs_item_ops xfs_attri_item_ops = {
+ .flags = XFS_ITEM_INTENT,
+ .iop_size = xfs_attri_item_size,
+ .iop_format = xfs_attri_item_format,
+ .iop_unpin = xfs_attri_item_unpin,
+ .iop_committed = xfs_attri_item_committed,
+ .iop_release = xfs_attri_item_release,
+ .iop_recover = xfs_attri_item_recover,
+ .iop_match = xfs_attri_item_match,
+ .iop_relog = xfs_attri_item_relog,
+};
+
+const struct xlog_recover_item_ops xlog_attri_item_ops = {
+ .item_type = XFS_LI_ATTRI,
+ .commit_pass2 = xlog_recover_attri_commit_pass2,
+};
+
+static const struct xfs_item_ops xfs_attrd_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
+ XFS_ITEM_INTENT_DONE,
+ .iop_size = xfs_attrd_item_size,
+ .iop_format = xfs_attrd_item_format,
+ .iop_release = xfs_attrd_item_release,
+ .iop_intent = xfs_attrd_item_intent,
+};
+
+const struct xlog_recover_item_ops xlog_attrd_item_ops = {
+ .item_type = XFS_LI_ATTRD,
+ .commit_pass2 = xlog_recover_attrd_commit_pass2,
+};
diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h
new file mode 100644
index 000000000000..c3b779f82adb
--- /dev/null
+++ b/fs/xfs/xfs_attr_item.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Copyright (C) 2022 Oracle. All Rights Reserved.
+ * Author: Allison Henderson <allison.henderson@oracle.com>
+ */
+#ifndef __XFS_ATTR_ITEM_H__
+#define __XFS_ATTR_ITEM_H__
+
+/* kernel only ATTRI/ATTRD definitions */
+
+struct xfs_mount;
+struct kmem_zone;
+
+/*
+ * This is the "attr intention" log item. It is used to log the fact that some
+ * extended attribute operations need to be processed. An operation is
+ * currently either a set or remove. Set or remove operations are described by
+ * the xfs_attr_item which may be logged to this intent.
+ *
+ * During a normal attr operation, name and value point to the name and value
+ * fields of the caller's xfs_da_args structure. During a recovery, the name
+ * and value buffers are copied from the log, and stored in a trailing buffer
+ * attached to the xfs_attr_item until they are committed. They are freed when
+ * the xfs_attr_item itself is freed when the work is done.
+ */
+struct xfs_attri_log_item {
+ struct xfs_log_item attri_item;
+ atomic_t attri_refcount;
+ int attri_name_len;
+ int attri_value_len;
+ void *attri_name;
+ void *attri_value;
+ struct xfs_attri_log_format attri_format;
+};
+
+/*
+ * This is the "attr done" log item. It is used to log the fact that some attrs
+ * earlier mentioned in an attri item have been freed.
+ */
+struct xfs_attrd_log_item {
+ struct xfs_log_item attrd_item;
+ struct xfs_attri_log_item *attrd_attrip;
+ struct xfs_attrd_log_format attrd_format;
+};
+
+#endif /* __XFS_ATTR_ITEM_H__ */
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 2d1e5134cebe..90a14e85e76d 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -15,6 +15,7 @@
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_bmap.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_sf.h"
#include "xfs_attr_leaf.h"
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 749fd18c4f32..296faa41d81d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -57,6 +57,9 @@ static unsigned int xfs_errortag_random_default[] = {
XFS_RANDOM_REDUCE_MAX_IEXTENTS,
XFS_RANDOM_BMAP_ALLOC_MINLEN_EXTENT,
XFS_RANDOM_AG_RESV_FAIL,
+ XFS_RANDOM_LARP,
+ XFS_RANDOM_DA_LEAF_SPLIT,
+ XFS_RANDOM_ATTR_LEAF_TO_NODE,
};
struct xfs_errortag_attr {
@@ -170,6 +173,9 @@ XFS_ERRORTAG_ATTR_RW(buf_ioerror, XFS_ERRTAG_BUF_IOERROR);
XFS_ERRORTAG_ATTR_RW(reduce_max_iextents, XFS_ERRTAG_REDUCE_MAX_IEXTENTS);
XFS_ERRORTAG_ATTR_RW(bmap_alloc_minlen_extent, XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT);
XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL);
+XFS_ERRORTAG_ATTR_RW(larp, XFS_ERRTAG_LARP);
+XFS_ERRORTAG_ATTR_RW(da_leaf_split, XFS_ERRTAG_DA_LEAF_SPLIT);
+XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node, XFS_ERRTAG_ATTR_LEAF_TO_NODE);
static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -211,6 +217,9 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(reduce_max_iextents),
XFS_ERRORTAG_ATTR_LIST(bmap_alloc_minlen_extent),
XFS_ERRORTAG_ATTR_LIST(ag_resv_fail),
+ XFS_ERRORTAG_ATTR_LIST(larp),
+ XFS_ERRORTAG_ATTR_LIST(da_leaf_split),
+ XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node),
NULL,
};
ATTRIBUTE_GROUPS(xfs_errortag);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index f62fa652c2fd..4d0a98f920ca 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -41,5 +41,6 @@ struct xfs_globals xfs_globals = {
#endif
#ifdef DEBUG
.pwork_threads = -1, /* automatic thread detection */
+ .larp = false, /* log attribute replay */
#endif
};
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index e9eadc7337ce..0e5cb7936206 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -15,6 +15,8 @@
#include "xfs_iwalk.h"
#include "xfs_itable.h"
#include "xfs_error.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
@@ -35,8 +37,6 @@
#include "xfs_health.h"
#include "xfs_reflink.h"
#include "xfs_ioctl.h"
-#include "xfs_da_format.h"
-#include "xfs_da_btree.h"
#include <linux/mount.h>
#include <linux/namei.h>
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index ca25ed89b706..2f54b701eead 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -17,6 +17,8 @@
#include "xfs_itable.h"
#include "xfs_fsops.h"
#include "xfs_rtalloc.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_ioctl.h"
#include "xfs_ioctl32.h"
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 94313b7e9991..e912b7fee714 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -13,6 +13,8 @@
#include "xfs_inode.h"
#include "xfs_acl.h"
#include "xfs_quota.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_trans.h"
#include "xfs_trace.h"
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1e972f884a81..9dc748abdf33 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3877,3 +3877,44 @@ xlog_drop_incompat_feat(
{
up_read(&log->l_incompat_users);
}
+
+/*
+ * Get permission to use log-assisted atomic exchange of file extents.
+ *
+ * Callers must not be running any transactions or hold any inode locks, and
+ * they must release the permission by calling xlog_drop_incompat_feat
+ * when they're done.
+ */
+int
+xfs_attr_use_log_assist(
+ struct xfs_mount *mp)
+{
+ int error = 0;
+
+ /*
+ * Protect ourselves from an idle log clearing the logged xattrs log
+ * incompat feature bit.
+ */
+ xlog_use_incompat_feat(mp->m_log);
+
+ /*
+ * If log-assisted xattrs are already enabled, the caller can use the
+ * log assisted swap functions with the log-incompat reference we got.
+ */
+ if (xfs_sb_version_haslogxattrs(&mp->m_sb))
+ return 0;
+
+ /* Enable log-assisted xattrs. */
+ error = xfs_add_incompat_log_feature(mp,
+ XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
+ if (error)
+ goto drop_incompat;
+
+ xfs_warn_once(mp,
+"EXPERIMENTAL logged extended attributes feature added. Use at your own risk!");
+
+ return 0;
+drop_incompat:
+ xlog_drop_incompat_feat(mp->m_log);
+ return error;
+}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3a4f6a4e4eb7..252b098cde1f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -153,5 +153,6 @@ bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags);
void xlog_use_incompat_feat(struct xlog *log);
void xlog_drop_incompat_feat(struct xlog *log);
+int xfs_attr_use_log_assist(struct xfs_mount *mp);
#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 70f718d76ceb..6ca6fe8f2747 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -135,39 +135,6 @@ xlog_cil_iovec_space(
}
/*
- * shadow buffers can be large, so we need to use kvmalloc() here to ensure
- * success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts to fall
- * back to vmalloc, so we can't actually do anything useful with gfp flags to
- * control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() will do
- * direct reclaim and compaction in the slow path, both of which are
- * horrendously expensive. We just want kmalloc to fail fast and fall back to
- * vmalloc if it can't get somethign straight away from the free lists or buddy
- * allocator. Hence we have to open code kvmalloc outselves here.
- *
- * Also, we are in memalloc_nofs_save task context here, so despite the use of
- * GFP_KERNEL here, we are actually going to be doing GFP_NOFS allocations. This
- * is actually the only way to make vmalloc() do GFP_NOFS allocations, so lets
- * just all pretend this is a GFP_KERNEL context operation....
- */
-static inline void *
-xlog_cil_kvmalloc(
- size_t buf_size)
-{
- gfp_t flags = GFP_KERNEL;
- void *p;
-
- flags &= ~__GFP_DIRECT_RECLAIM;
- flags |= __GFP_NOWARN | __GFP_NORETRY;
- do {
- p = kmalloc(buf_size, flags);
- if (!p)
- p = vmalloc(buf_size);
- } while (!p);
-
- return p;
-}
-
-/*
* Allocate or pin log vector buffers for CIL insertion.
*
* The CIL currently uses disposable buffers for copying a snapshot of the
@@ -283,7 +250,7 @@ xlog_cil_alloc_shadow_bufs(
* storage.
*/
kmem_free(lip->li_lv_shadow);
- lv = xlog_cil_kvmalloc(buf_size);
+ lv = xlog_kvmalloc(buf_size);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4f7e844d28ad..67fd9789e69a 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -651,4 +651,38 @@ xlog_valid_lsn(
return valid;
}
+/*
+ * Log vector and shadow buffers can be large, so we need to use kvmalloc() here
+ * to ensure success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts
+ * to fall back to vmalloc, so we can't actually do anything useful with gfp
+ * flags to control the kmalloc() behaviour within kvmalloc(). Hence kmalloc()
+ * will do direct reclaim and compaction in the slow path, both of which are
+ * horrendously expensive. We just want kmalloc to fail fast and fall back to
+ * vmalloc if it can't get somethign straight away from the free lists or
+ * buddy allocator. Hence we have to open code kvmalloc outselves here.
+ *
+ * This assumes that the caller uses memalloc_nofs_save task context here, so
+ * despite the use of GFP_KERNEL here, we are going to be doing GFP_NOFS
+ * allocations. This is actually the only way to make vmalloc() do GFP_NOFS
+ * allocations, so lets just all pretend this is a GFP_KERNEL context
+ * operation....
+ */
+static inline void *
+xlog_kvmalloc(
+ size_t buf_size)
+{
+ gfp_t flags = GFP_KERNEL;
+ void *p;
+
+ flags &= ~__GFP_DIRECT_RECLAIM;
+ flags |= __GFP_NOWARN | __GFP_NORETRY;
+ do {
+ p = kmalloc(buf_size, flags);
+ if (!p)
+ p = vmalloc(buf_size);
+ } while (!p);
+
+ return p;
+}
+
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c4ad4296c540..97b941c07957 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1800,6 +1800,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
&xlog_cud_item_ops,
&xlog_bui_item_ops,
&xlog_bud_item_ops,
+ &xlog_attri_item_ops,
+ &xlog_attrd_item_ops,
};
static const struct xlog_recover_item_ops *
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 25991923c1a8..758702b9495f 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -132,6 +132,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16);
/*
* The v5 superblock format extended several v4 header structures with
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index 7692e76ead33..f78ad6b10ea5 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -83,6 +83,7 @@ extern xfs_param_t xfs_params;
struct xfs_globals {
#ifdef DEBUG
int pwork_threads; /* parallel workqueue threads */
+ bool larp; /* log attribute replay */
#endif
int log_recovery_delay; /* log recovery delay (secs) */
int mount_delay; /* mount setup delay (secs) */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 574b80c29fe1..f7faf6e70d7f 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -228,6 +228,29 @@ pwork_threads_show(
return sysfs_emit(buf, "%d\n", xfs_globals.pwork_threads);
}
XFS_SYSFS_ATTR_RW(pwork_threads);
+
+static ssize_t
+larp_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &xfs_globals.larp);
+ if (ret < 0)
+ return ret;
+ return count;
+}
+
+STATIC ssize_t
+larp_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.larp);
+}
+XFS_SYSFS_ATTR_RW(larp);
#endif /* DEBUG */
static struct attribute *xfs_dbg_attrs[] = {
@@ -237,6 +260,7 @@ static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(always_cow),
#ifdef DEBUG
ATTR_LIST(pwork_threads),
+ ATTR_LIST(larp),
#endif
NULL,
};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e19a3f7351be..d32026585c1b 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4129,6 +4129,27 @@ DEFINE_ICLOG_EVENT(xlog_iclog_want_sync);
DEFINE_ICLOG_EVENT(xlog_iclog_wait_on);
DEFINE_ICLOG_EVENT(xlog_iclog_write);
+TRACE_DEFINE_ENUM(XFS_DAS_UNINIT);
+TRACE_DEFINE_ENUM(XFS_DAS_SF_ADD);
+TRACE_DEFINE_ENUM(XFS_DAS_SF_REMOVE);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_ADD);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_ADD);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_SET_RMT);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_ALLOC_RMT);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REPLACE);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE_OLD);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE_RMT);
+TRACE_DEFINE_ENUM(XFS_DAS_LEAF_REMOVE_ATTR);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_SET_RMT);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_ALLOC_RMT);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_REPLACE);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE_OLD);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE_RMT);
+TRACE_DEFINE_ENUM(XFS_DAS_NODE_REMOVE_ATTR);
+TRACE_DEFINE_ENUM(XFS_DAS_DONE);
+
DECLARE_EVENT_CLASS(xfs_das_state_class,
TP_PROTO(int das, struct xfs_inode *ip),
TP_ARGS(das, ip),
@@ -4140,8 +4161,9 @@ DECLARE_EVENT_CLASS(xfs_das_state_class,
__entry->das = das;
__entry->ino = ip->i_ino;
),
- TP_printk("state change %d ino 0x%llx",
- __entry->das, __entry->ino)
+ TP_printk("state change %s ino 0x%llx",
+ __print_symbolic(__entry->das, XFS_DAS_STRINGS),
+ __entry->ino)
)
#define DEFINE_DAS_STATE_EVENT(name) \
@@ -4150,9 +4172,15 @@ DEFINE_EVENT(xfs_das_state_class, name, \
TP_ARGS(das, ip))
DEFINE_DAS_STATE_EVENT(xfs_attr_sf_addname_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_set_iter_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_leaf_addname_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_node_addname_return);
DEFINE_DAS_STATE_EVENT(xfs_attr_remove_iter_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_alloc);
DEFINE_DAS_STATE_EVENT(xfs_attr_rmtval_remove_return);
+DEFINE_DAS_STATE_EVENT(xfs_attr_defer_add);
+DEFINE_DAS_STATE_EVENT(xfs_attr_defer_replace);
+DEFINE_DAS_STATE_EVENT(xfs_attr_defer_remove);
+
TRACE_EVENT(xfs_force_shutdown,
TP_PROTO(struct xfs_mount *mp, int ptag, int flags, const char *fname,
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 0d050f8829ef..7a044afd4c46 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -12,9 +12,9 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
+#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_acl.h"
-#include "xfs_da_btree.h"
#include <linux/posix_acl_xattr.h>