summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/libxfs/xfs_attr.c2
-rw-r--r--fs/xfs/xfs_inode.c14
-rw-r--r--fs/xfs/xfs_iops.c11
-rw-r--r--fs/xfs/xfs_iops.h3
-rw-r--r--fs/xfs/xfs_log.c55
-rw-r--r--fs/xfs/xfs_log.h3
-rw-r--r--fs/xfs/xfs_log_cil.c472
-rw-r--r--fs/xfs/xfs_log_priv.h58
-rw-r--r--fs/xfs/xfs_super.c1
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_trans_priv.h3
12 files changed, 468 insertions, 159 deletions
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 224649a76cbb..6b8857e53add 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -1558,7 +1558,7 @@ xfs_attr_node_get(
* If not in a transaction, we have to release all the buffers.
*/
out_release:
- for (i = 0; state != NULL && i < state->path.active; i++) {
+ for (i = 0; i < state->path.active; i++) {
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ace537a17d0d..622953791ddd 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2712,10 +2712,12 @@ out_trans_abort:
static int
xfs_rename_alloc_whiteout(
struct user_namespace *mnt_userns,
+ struct xfs_name *src_name,
struct xfs_inode *dp,
struct xfs_inode **wip)
{
struct xfs_inode *tmpfile;
+ struct qstr name;
int error;
error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE,
@@ -2723,6 +2725,15 @@ xfs_rename_alloc_whiteout(
if (error)
return error;
+ name.name = src_name->name;
+ name.len = src_name->len;
+ error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name);
+ if (error) {
+ xfs_finish_inode_setup(tmpfile);
+ xfs_irele(tmpfile);
+ return error;
+ }
+
/*
* Prepare the tmpfile inode as if it were created through the VFS.
* Complete the inode setup and flag it as linkable. nlink is already
@@ -2773,7 +2784,8 @@ xfs_rename(
* appropriately.
*/
if (flags & RENAME_WHITEOUT) {
- error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
+ error = xfs_rename_alloc_whiteout(mnt_userns, src_name,
+ target_dp, &wip);
if (error)
return error;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 29f5b8b8aca6..6720b60f88cf 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -75,9 +75,8 @@ xfs_initxattrs(
* these attrs can be journalled at inode creation time (along with the
* inode, of course, such that log replay can't cause these to be lost).
*/
-
-STATIC int
-xfs_init_security(
+int
+xfs_inode_init_security(
struct inode *inode,
struct inode *dir,
const struct qstr *qstr)
@@ -122,7 +121,7 @@ xfs_cleanup_inode(
/* Oh, the horror.
* If we can't add the ACL or we fail in
- * xfs_init_security we must back out.
+ * xfs_inode_init_security we must back out.
* ENOSPC can hit here, among other things.
*/
xfs_dentry_to_name(&teardown, dentry);
@@ -208,7 +207,7 @@ xfs_generic_create(
inode = VFS_I(ip);
- error = xfs_init_security(inode, dir, &dentry->d_name);
+ error = xfs_inode_init_security(inode, dir, &dentry->d_name);
if (unlikely(error))
goto out_cleanup_inode;
@@ -424,7 +423,7 @@ xfs_vn_symlink(
inode = VFS_I(cip);
- error = xfs_init_security(inode, dir, &dentry->d_name);
+ error = xfs_inode_init_security(inode, dir, &dentry->d_name);
if (unlikely(error))
goto out_cleanup_inode;
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 278949056048..cb5fc68c9ea0 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -17,4 +17,7 @@ extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
struct dentry *dentry, struct iattr *vap);
+int xfs_inode_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr);
+
#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ae904b21e9cc..498cbb49392b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -57,7 +57,8 @@ xlog_grant_push_ail(
STATIC void
xlog_sync(
struct xlog *log,
- struct xlog_in_core *iclog);
+ struct xlog_in_core *iclog,
+ struct xlog_ticket *ticket);
#if defined(DEBUG)
STATIC void
xlog_verify_grant_tail(
@@ -567,7 +568,8 @@ xlog_state_shutdown_callbacks(
int
xlog_state_release_iclog(
struct xlog *log,
- struct xlog_in_core *iclog)
+ struct xlog_in_core *iclog,
+ struct xlog_ticket *ticket)
{
xfs_lsn_t tail_lsn;
bool last_ref;
@@ -614,7 +616,7 @@ xlog_state_release_iclog(
trace_xlog_iclog_syncing(iclog, _RET_IP_);
spin_unlock(&log->l_icloglock);
- xlog_sync(log, iclog);
+ xlog_sync(log, iclog, ticket);
spin_lock(&log->l_icloglock);
return 0;
}
@@ -881,7 +883,7 @@ xlog_force_iclog(
iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA;
if (iclog->ic_state == XLOG_STATE_ACTIVE)
xlog_state_switch_iclogs(iclog->ic_log, iclog, 0);
- return xlog_state_release_iclog(iclog->ic_log, iclog);
+ return xlog_state_release_iclog(iclog->ic_log, iclog, NULL);
}
/*
@@ -944,6 +946,8 @@ xlog_write_unmount_record(
.lv_niovecs = 1,
.lv_iovecp = &reg,
};
+ LIST_HEAD(lv_chain);
+ list_add(&vec.lv_list, &lv_chain);
BUILD_BUG_ON((sizeof(struct xlog_op_header) +
sizeof(struct xfs_unmount_log_format)) !=
@@ -952,7 +956,7 @@ xlog_write_unmount_record(
/* account for space used by record data */
ticket->t_curr_res -= sizeof(unmount_rec);
- return xlog_write(log, NULL, &vec, ticket, reg.i_len);
+ return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len);
}
/*
@@ -2025,7 +2029,8 @@ xlog_calc_iclog_size(
STATIC void
xlog_sync(
struct xlog *log,
- struct xlog_in_core *iclog)
+ struct xlog_in_core *iclog,
+ struct xlog_ticket *ticket)
{
unsigned int count; /* byte count of bwrite */
unsigned int roundoff; /* roundoff to BB or stripe */
@@ -2037,12 +2042,20 @@ xlog_sync(
count = xlog_calc_iclog_size(log, iclog, &roundoff);
- /* move grant heads by roundoff in sync */
- xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
- xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
+ /*
+ * If we have a ticket, account for the roundoff via the ticket
+ * reservation to avoid touching the hot grant heads needlessly.
+ * Otherwise, we have to move grant heads directly.
+ */
+ if (ticket) {
+ ticket->t_curr_res -= roundoff;
+ } else {
+ xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
+ xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
+ }
/* put cycle number in every block */
- xlog_pack_data(log, iclog, roundoff);
+ xlog_pack_data(log, iclog, roundoff);
/* real byte length */
size = iclog->ic_offset;
@@ -2275,7 +2288,7 @@ xlog_write_get_more_iclog_space(
spin_lock(&log->l_icloglock);
ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, ticket);
spin_unlock(&log->l_icloglock);
if (error)
return error;
@@ -2471,13 +2484,13 @@ int
xlog_write(
struct xlog *log,
struct xfs_cil_ctx *ctx,
- struct xfs_log_vec *log_vector,
+ struct list_head *lv_chain,
struct xlog_ticket *ticket,
uint32_t len)
{
struct xlog_in_core *iclog = NULL;
- struct xfs_log_vec *lv = log_vector;
+ struct xfs_log_vec *lv;
uint32_t record_cnt = 0;
uint32_t data_cnt = 0;
int error = 0;
@@ -2505,7 +2518,7 @@ xlog_write(
if (ctx)
xlog_cil_set_ctx_write_state(ctx, iclog);
- while (lv) {
+ list_for_each_entry(lv, lv_chain, lv_list) {
/*
* If the entire log vec does not fit in the iclog, punt it to
* the partial copy loop which can handle this case.
@@ -2526,7 +2539,6 @@ xlog_write(
xlog_write_full(lv, ticket, iclog, &log_offset,
&len, &record_cnt, &data_cnt);
}
- lv = lv->lv_next;
}
ASSERT(len == 0);
@@ -2538,7 +2550,7 @@ xlog_write(
*/
spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, record_cnt, 0);
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, ticket);
spin_unlock(&log->l_icloglock);
return error;
@@ -2958,7 +2970,7 @@ restart:
* reference to the iclog.
*/
if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, ticket);
spin_unlock(&log->l_icloglock);
if (error)
return error;
@@ -3406,7 +3418,8 @@ xfs_log_ticket_get(
static int
xlog_calc_unit_res(
struct xlog *log,
- int unit_bytes)
+ int unit_bytes,
+ int *niclogs)
{
int iclog_space;
uint num_headers;
@@ -3486,6 +3499,8 @@ xlog_calc_unit_res(
/* roundoff padding for transaction data and one for commit record */
unit_bytes += 2 * log->l_iclog_roundoff;
+ if (niclogs)
+ *niclogs = num_headers;
return unit_bytes;
}
@@ -3494,7 +3509,7 @@ xfs_log_calc_unit_res(
struct xfs_mount *mp,
int unit_bytes)
{
- return xlog_calc_unit_res(mp->m_log, unit_bytes);
+ return xlog_calc_unit_res(mp->m_log, unit_bytes, NULL);
}
/*
@@ -3512,7 +3527,7 @@ xlog_ticket_alloc(
tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL);
- unit_res = xlog_calc_unit_res(log, unit_bytes);
+ unit_res = xlog_calc_unit_res(log, unit_bytes, &tic->t_iclog_hdrs);
atomic_set(&tic->t_ref, 1);
tic->t_task = current;
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index f3ce046a7d45..2728886c2963 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -9,7 +9,8 @@
struct xfs_cil_ctx;
struct xfs_log_vec {
- struct xfs_log_vec *lv_next; /* next lv in build list */
+ struct list_head lv_list; /* CIL lv chain ptrs */
+ uint32_t lv_order_id; /* chain ordering info */
int lv_niovecs; /* number of iovecs in lv */
struct xfs_log_iovec *lv_iovecp; /* iovec array */
struct xfs_log_item *lv_item; /* owner */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index db6cb7800251..eccbfb99e894 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -44,9 +44,20 @@ xlog_cil_ticket_alloc(
* transaction overhead reservation from the first transaction commit.
*/
tic->t_curr_res = 0;
+ tic->t_iclog_hdrs = 0;
return tic;
}
+static inline void
+xlog_cil_set_iclog_hdr_count(struct xfs_cil *cil)
+{
+ struct xlog *log = cil->xc_log;
+
+ atomic_set(&cil->xc_iclog_hdrs,
+ (XLOG_CIL_BLOCKING_SPACE_LIMIT(log) /
+ (log->l_iclog_size - log->l_iclog_hsize)));
+}
+
/*
* Check if the current log item was first committed in this sequence.
* We can't rely on just the log item being in the CIL, we have to check
@@ -61,7 +72,7 @@ xlog_item_in_current_chkpt(
struct xfs_cil *cil,
struct xfs_log_item *lip)
{
- if (list_empty(&lip->li_cil))
+ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags))
return false;
/*
@@ -93,15 +104,88 @@ xlog_cil_ctx_alloc(void)
ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents);
+ INIT_LIST_HEAD(&ctx->log_items);
+ INIT_LIST_HEAD(&ctx->lv_chain);
INIT_WORK(&ctx->push_work, xlog_cil_push_work);
return ctx;
}
+/*
+ * Aggregate the CIL per cpu structures into global counts, lists, etc and
+ * clear the percpu state ready for the next context to use. This is called
+ * from the push code with the context lock held exclusively, hence nothing else
+ * will be accessing or modifying the per-cpu counters.
+ */
+static void
+xlog_cil_push_pcp_aggregate(
+ struct xfs_cil *cil,
+ struct xfs_cil_ctx *ctx)
+{
+ struct xlog_cil_pcp *cilpcp;
+ int cpu;
+
+ for_each_online_cpu(cpu) {
+ cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+
+ ctx->ticket->t_curr_res += cilpcp->space_reserved;
+ cilpcp->space_reserved = 0;
+
+ if (!list_empty(&cilpcp->busy_extents)) {
+ list_splice_init(&cilpcp->busy_extents,
+ &ctx->busy_extents);
+ }
+ if (!list_empty(&cilpcp->log_items))
+ list_splice_init(&cilpcp->log_items, &ctx->log_items);
+
+ /*
+ * We're in the middle of switching cil contexts. Reset the
+ * counter we use to detect when the current context is nearing
+ * full.
+ */
+ cilpcp->space_used = 0;
+ }
+}
+
+/*
+ * Aggregate the CIL per-cpu space used counters into the global atomic value.
+ * This is called when the per-cpu counter aggregation will first pass the soft
+ * limit threshold so we can switch to atomic counter aggregation for accurate
+ * detection of hard limit traversal.
+ */
+static void
+xlog_cil_insert_pcp_aggregate(
+ struct xfs_cil *cil,
+ struct xfs_cil_ctx *ctx)
+{
+ struct xlog_cil_pcp *cilpcp;
+ int cpu;
+ int count = 0;
+
+ /* Trigger atomic updates then aggregate only for the first caller */
+ if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags))
+ return;
+
+ for_each_online_cpu(cpu) {
+ int old, prev;
+
+ cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+ do {
+ old = cilpcp->space_used;
+ prev = cmpxchg(&cilpcp->space_used, old, 0);
+ } while (old != prev);
+ count += old;
+ }
+ atomic_add(count, &ctx->space_used);
+}
+
static void
xlog_cil_ctx_switch(
struct xfs_cil *cil,
struct xfs_cil_ctx *ctx)
{
+ xlog_cil_set_iclog_hdr_count(cil);
+ set_bit(XLOG_CIL_EMPTY, &cil->xc_flags);
+ set_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags);
ctx->sequence = ++cil->xc_current_sequence;
ctx->cil = cil;
cil->xc_ctx = ctx;
@@ -123,6 +207,7 @@ xlog_cil_init_post_recovery(
{
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
+ xlog_cil_set_iclog_hdr_count(log->l_cilp);
}
static inline int
@@ -254,6 +339,7 @@ xlog_cil_alloc_shadow_bufs(
memset(lv, 0, xlog_cil_iovec_space(niovecs));
+ INIT_LIST_HEAD(&lv->lv_list);
lv->lv_item = lip;
lv->lv_size = buf_size;
if (ordered)
@@ -269,7 +355,6 @@ xlog_cil_alloc_shadow_bufs(
else
lv->lv_buf_len = 0;
lv->lv_bytes = 0;
- lv->lv_next = NULL;
}
/* Ensure the lv is set up according to ->iop_size */
@@ -396,7 +481,6 @@ xlog_cil_insert_format_items(
if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
/* same or smaller, optimise common overwrite case */
lv = lip->li_lv;
- lv->lv_next = NULL;
if (ordered)
goto insert;
@@ -434,6 +518,23 @@ insert:
}
/*
+ * The use of lockless waitqueue_active() requires that the caller has
+ * serialised itself against the wakeup call in xlog_cil_push_work(). That
+ * can be done by either holding the push lock or the context lock.
+ */
+static inline bool
+xlog_cil_over_hard_limit(
+ struct xlog *log,
+ int32_t space_used)
+{
+ if (waitqueue_active(&log->l_cilp->xc_push_wait))
+ return true;
+ if (space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
+ return true;
+ return false;
+}
+
+/*
* Insert the log items into the CIL and calculate the difference in space
* consumed by the item. Add the space to the checkpoint ticket and calculate
* if the change requires additional log metadata. If it does, take that space
@@ -450,8 +551,10 @@ xlog_cil_insert_items(
struct xfs_cil_ctx *ctx = cil->xc_ctx;
struct xfs_log_item *lip;
int len = 0;
- int iclog_space;
int iovhdr_res = 0, split_res = 0, ctx_res = 0;
+ int space_used;
+ int order;
+ struct xlog_cil_pcp *cilpcp;
ASSERT(tp);
@@ -461,93 +564,135 @@ xlog_cil_insert_items(
*/
xlog_cil_insert_format_items(log, tp, &len);
- spin_lock(&cil->xc_cil_lock);
+ /*
+ * Subtract the space released by intent cancelation from the space we
+ * consumed so that we remove it from the CIL space and add it back to
+ * the current transaction reservation context.
+ */
+ len -= released_space;
- /* attach the transaction to the CIL if it has any busy extents */
- if (!list_empty(&tp->t_busy))
- list_splice_init(&tp->t_busy, &ctx->busy_extents);
+ /*
+ * Grab the per-cpu pointer for the CIL before we start any accounting.
+ * That ensures that we are running with pre-emption disabled and so we
+ * can't be scheduled away between split sample/update operations that
+ * are done without outside locking to serialise them.
+ */
+ cilpcp = get_cpu_ptr(cil->xc_pcp);
/*
- * Now transfer enough transaction reservation to the context ticket
- * for the checkpoint. The context ticket is special - the unit
- * reservation has to grow as well as the current reservation as we
- * steal from tickets so we can correctly determine the space used
- * during the transaction commit.
+ * We need to take the CIL checkpoint unit reservation on the first
+ * commit into the CIL. Test the XLOG_CIL_EMPTY bit first so we don't
+ * unnecessarily do an atomic op in the fast path here. We can clear the
+ * XLOG_CIL_EMPTY bit as we are under the xc_ctx_lock here and that
+ * needs to be held exclusively to reset the XLOG_CIL_EMPTY bit.
*/
- if (ctx->ticket->t_curr_res == 0) {
+ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) &&
+ test_and_clear_bit(XLOG_CIL_EMPTY, &cil->xc_flags))
ctx_res = ctx->ticket->t_unit_res;
- ctx->ticket->t_curr_res = ctx_res;
- tp->t_ticket->t_curr_res -= ctx_res;
- }
- /* do we need space for more log record headers? */
- iclog_space = log->l_iclog_size - log->l_iclog_hsize;
- if (len > 0 && (ctx->space_used / iclog_space !=
- (ctx->space_used + len) / iclog_space)) {
- split_res = (len + iclog_space - 1) / iclog_space;
- /* need to take into account split region headers, too */
- split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
- ctx->ticket->t_unit_res += split_res;
- ctx->ticket->t_curr_res += split_res;
- tp->t_ticket->t_curr_res -= split_res;
- ASSERT(tp->t_ticket->t_curr_res >= len);
+ /*
+ * Check if we need to steal iclog headers. atomic_read() is not a
+ * locked atomic operation, so we can check the value before we do any
+ * real atomic ops in the fast path. If we've already taken the CIL unit
+ * reservation from this commit, we've already got one iclog header
+ * space reserved so we have to account for that otherwise we risk
+ * overrunning the reservation on this ticket.
+ *
+ * If the CIL is already at the hard limit, we might need more header
+ * space that originally reserved. So steal more header space from every
+ * commit that occurs once we are over the hard limit to ensure the CIL
+ * push won't run out of reservation space.
+ *
+ * This can steal more than we need, but that's OK.
+ *
+ * The cil->xc_ctx_lock provides the serialisation necessary for safely
+ * calling xlog_cil_over_hard_limit() in this context.
+ */
+ space_used = atomic_read(&ctx->space_used) + cilpcp->space_used + len;
+ if (atomic_read(&cil->xc_iclog_hdrs) > 0 ||
+ xlog_cil_over_hard_limit(log, space_used)) {
+ split_res = log->l_iclog_hsize +
+ sizeof(struct xlog_op_header);
+ if (ctx_res)
+ ctx_res += split_res * (tp->t_ticket->t_iclog_hdrs - 1);
+ else
+ ctx_res = split_res * tp->t_ticket->t_iclog_hdrs;
+ atomic_sub(tp->t_ticket->t_iclog_hdrs, &cil->xc_iclog_hdrs);
}
- tp->t_ticket->t_curr_res -= len;
- tp->t_ticket->t_curr_res += released_space;
- ctx->space_used += len;
- ctx->space_used -= released_space;
+ cilpcp->space_reserved += ctx_res;
/*
- * If we've overrun the reservation, dump the tx details before we move
- * the log items. Shutdown is imminent...
+ * Accurately account when over the soft limit, otherwise fold the
+ * percpu count into the global count if over the per-cpu threshold.
*/
- if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
- xfs_warn(log->l_mp, "Transaction log reservation overrun:");
- xfs_warn(log->l_mp,
- " log items: %d bytes (iov hdrs: %d bytes)",
- len, iovhdr_res);
- xfs_warn(log->l_mp, " split region headers: %d bytes",
- split_res);
- xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
- xlog_print_trans(tp);
+ if (!test_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) {
+ atomic_add(len, &ctx->space_used);
+ } else if (cilpcp->space_used + len >
+ (XLOG_CIL_SPACE_LIMIT(log) / num_online_cpus())) {
+ space_used = atomic_add_return(cilpcp->space_used + len,
+ &ctx->space_used);
+ cilpcp->space_used = 0;
+
+ /*
+ * If we just transitioned over the soft limit, we need to
+ * transition to the global atomic counter.
+ */
+ if (space_used >= XLOG_CIL_SPACE_LIMIT(log))
+ xlog_cil_insert_pcp_aggregate(cil, ctx);
+ } else {
+ cilpcp->space_used += len;
}
+ /* attach the transaction to the CIL if it has any busy extents */
+ if (!list_empty(&tp->t_busy))
+ list_splice_init(&tp->t_busy, &cilpcp->busy_extents);
/*
- * Now (re-)position everything modified at the tail of the CIL.
+ * Now update the order of everything modified in the transaction
+ * and insert items into the CIL if they aren't already there.
* We do this here so we only need to take the CIL lock once during
* the transaction commit.
*/
+ order = atomic_inc_return(&ctx->order_id);
list_for_each_entry(lip, &tp->t_items, li_trans) {
-
/* Skip items which aren't dirty in this transaction. */
if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
continue;
- /*
- * Only move the item if it isn't already at the tail. This is
- * to prevent a transient list_empty() state when reinserting
- * an item that is already the only item in the CIL.
- */
- if (!list_is_last(&lip->li_cil, &cil->xc_cil))
- list_move_tail(&lip->li_cil, &cil->xc_cil);
+ lip->li_order_id = order;
+ if (!list_empty(&lip->li_cil))
+ continue;
+ list_add_tail(&lip->li_cil, &cilpcp->log_items);
}
+ put_cpu_ptr(cilpcp);
- spin_unlock(&cil->xc_cil_lock);
-
- if (tp->t_ticket->t_curr_res < 0)
+ /*
+ * If we've overrun the reservation, dump the tx details before we move
+ * the log items. Shutdown is imminent...
+ */
+ tp->t_ticket->t_curr_res -= ctx_res + len;
+ if (WARN_ON(tp->t_ticket->t_curr_res < 0)) {
+ xfs_warn(log->l_mp, "Transaction log reservation overrun:");
+ xfs_warn(log->l_mp,
+ " log items: %d bytes (iov hdrs: %d bytes)",
+ len, iovhdr_res);
+ xfs_warn(log->l_mp, " split region headers: %d bytes",
+ split_res);
+ xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res);
+ xlog_print_trans(tp);
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
+ }
}
static void
xlog_cil_free_logvec(
- struct xfs_log_vec *log_vector)
+ struct list_head *lv_chain)
{
struct xfs_log_vec *lv;
- for (lv = log_vector; lv; ) {
- struct xfs_log_vec *next = lv->lv_next;
+ while (!list_empty(lv_chain)) {
+ lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_list);
+ list_del_init(&lv->lv_list);
kmem_free(lv);
- lv = next;
}
}
@@ -647,7 +792,7 @@ xlog_cil_committed(
spin_unlock(&ctx->cil->xc_push_lock);
}
- xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
+ xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain,
ctx->start_lsn, abort);
xfs_extent_busy_sort(&ctx->busy_extents);
@@ -658,7 +803,7 @@ xlog_cil_committed(
list_del(&ctx->committing);
spin_unlock(&ctx->cil->xc_push_lock);
- xlog_cil_free_logvec(ctx->lv_chain);
+ xlog_cil_free_logvec(&ctx->lv_chain);
if (!list_empty(&ctx->busy_extents))
xlog_discard_busy_extents(mp, ctx);
@@ -817,7 +962,6 @@ restart:
static int
xlog_cil_write_chain(
struct xfs_cil_ctx *ctx,
- struct xfs_log_vec *chain,
uint32_t chain_len)
{
struct xlog *log = ctx->cil->xc_log;
@@ -826,7 +970,7 @@ xlog_cil_write_chain(
error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD);
if (error)
return error;
- return xlog_write(log, ctx, chain, ctx->ticket, chain_len);
+ return xlog_write(log, ctx, &ctx->lv_chain, ctx->ticket, chain_len);
}
/*
@@ -855,6 +999,8 @@ xlog_cil_write_commit_record(
.lv_iovecp = &reg,
};
int error;
+ LIST_HEAD(lv_chain);
+ list_add(&vec.lv_list, &lv_chain);
if (xlog_is_shutdown(log))
return -EIO;
@@ -865,7 +1011,7 @@ xlog_cil_write_commit_record(
/* account for space used by record data */
ctx->ticket->t_curr_res -= reg.i_len;
- error = xlog_write(log, ctx, &vec, ctx->ticket, reg.i_len);
+ error = xlog_write(log, ctx, &lv_chain, ctx->ticket, reg.i_len);
if (error)
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return error;
@@ -931,12 +1077,31 @@ xlog_cil_build_trans_hdr(
lvhdr->lv_niovecs = 2;
lvhdr->lv_iovecp = &hdr->lhdr[0];
lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len;
- lvhdr->lv_next = ctx->lv_chain;
tic->t_curr_res -= lvhdr->lv_bytes;
}
/*
+ * CIL item reordering compare function. We want to order in ascending ID order,
+ * but we want to leave items with the same ID in the order they were added to
+ * the list. This is important for operations like reflink where we log 4 order
+ * dependent intents in a single transaction when we overwrite an existing
+ * shared extent with a new shared extent. i.e. BUI(unmap), CUI(drop),
+ * CUI (inc), BUI(remap)...
+ */
+static int
+xlog_cil_order_cmp(
+ void *priv,
+ const struct list_head *a,
+ const struct list_head *b)
+{
+ struct xfs_log_vec *l1 = container_of(a, struct xfs_log_vec, lv_list);
+ struct xfs_log_vec *l2 = container_of(b, struct xfs_log_vec, lv_list);
+
+ return l1->lv_order_id > l2->lv_order_id;
+}
+
+/*
* Pull all the log vectors off the items in the CIL, and remove the items from
* the CIL. We don't need the CIL lock here because it's only needed on the
* transaction commit side which is currently locked out by the flush lock.
@@ -947,18 +1112,16 @@ xlog_cil_build_trans_hdr(
*/
static void
xlog_cil_build_lv_chain(
- struct xfs_cil *cil,
struct xfs_cil_ctx *ctx,
struct list_head *whiteouts,
uint32_t *num_iovecs,
uint32_t *num_bytes)
{
- struct xfs_log_vec *lv = NULL;
-
- while (!list_empty(&cil->xc_cil)) {
+ while (!list_empty(&ctx->log_items)) {
struct xfs_log_item *item;
+ struct xfs_log_vec *lv;
- item = list_first_entry(&cil->xc_cil,
+ item = list_first_entry(&ctx->log_items,
struct xfs_log_item, li_cil);
if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) {
@@ -967,18 +1130,18 @@ xlog_cil_build_lv_chain(
continue;
}
- list_del_init(&item->li_cil);
- if (!ctx->lv_chain)
- ctx->lv_chain = item->li_lv;
- else
- lv->lv_next = item->li_lv;
lv = item->li_lv;
- item->li_lv = NULL;
- *num_iovecs += lv->lv_niovecs;
+ lv->lv_order_id = item->li_order_id;
/* we don't write ordered log vectors */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
*num_bytes += lv->lv_bytes;
+ *num_iovecs += lv->lv_niovecs;
+ list_add_tail(&lv->lv_list, &ctx->lv_chain);
+
+ list_del_init(&item->li_cil);
+ item->li_order_id = 0;
+ item->li_lv = NULL;
}
}
@@ -1022,10 +1185,11 @@ xlog_cil_push_work(
int num_bytes = 0;
int error = 0;
struct xlog_cil_trans_hdr thdr;
- struct xfs_log_vec lvhdr = { NULL };
+ struct xfs_log_vec lvhdr = {};
xfs_csn_t push_seq;
bool push_commit_stable;
LIST_HEAD (whiteouts);
+ struct xlog_ticket *ticket;
new_ctx = xlog_cil_ctx_alloc();
new_ctx->ticket = xlog_cil_ticket_alloc(log);
@@ -1049,12 +1213,14 @@ xlog_cil_push_work(
if (waitqueue_active(&cil->xc_push_wait))
wake_up_all(&cil->xc_push_wait);
+ xlog_cil_push_pcp_aggregate(cil, ctx);
+
/*
* Check if we've anything to push. If there is nothing, then we don't
* move on to a new sequence number and so we have to be able to push
* this sequence again later.
*/
- if (list_empty(&cil->xc_cil)) {
+ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) {
cil->xc_push_seq = 0;
spin_unlock(&cil->xc_push_lock);
goto out_skip;
@@ -1094,7 +1260,7 @@ xlog_cil_push_work(
list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock);
- xlog_cil_build_lv_chain(cil, ctx, &whiteouts, &num_iovecs, &num_bytes);
+ xlog_cil_build_lv_chain(ctx, &whiteouts, &num_iovecs, &num_bytes);
/*
* Switch the contexts so we can drop the context lock and move out
@@ -1127,14 +1293,30 @@ xlog_cil_push_work(
up_write(&cil->xc_ctx_lock);
/*
+ * Sort the log vector chain before we add the transaction headers.
+ * This ensures we always have the transaction headers at the start
+ * of the chain.
+ */
+ list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp);
+
+ /*
* Build a checkpoint transaction header and write it to the log to
* begin the transaction. We need to account for the space used by the
* transaction header here as it is not accounted for in xlog_write().
+ * Add the lvhdr to the head of the lv chain we pass to xlog_write() so
+ * it gets written into the iclog first.
*/
xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs);
num_bytes += lvhdr.lv_bytes;
+ list_add(&lvhdr.lv_list, &ctx->lv_chain);
- error = xlog_cil_write_chain(ctx, &lvhdr, num_bytes);
+ /*
+ * Take the lvhdr back off the lv_chain immediately after calling
+ * xlog_cil_write_chain() as it should not be passed to log IO
+ * completion.
+ */
+ error = xlog_cil_write_chain(ctx, num_bytes);
+ list_del(&lvhdr.lv_list);
if (error)
goto out_abort_free_ticket;
@@ -1142,7 +1324,14 @@ xlog_cil_push_work(
if (error)
goto out_abort_free_ticket;
- xfs_log_ticket_ungrant(log, ctx->ticket);
+ /*
+ * Grab the ticket from the ctx so we can ungrant it after releasing the
+ * commit_iclog. The ctx may be freed by the time we return from
+ * releasing the commit_iclog (i.e. checkpoint has been completed and
+ * callback run) so we can't reference the ctx after the call to
+ * xlog_state_release_iclog().
+ */
+ ticket = ctx->ticket;
/*
* If the checkpoint spans multiple iclogs, wait for all previous iclogs
@@ -1192,12 +1381,14 @@ xlog_cil_push_work(
if (push_commit_stable &&
ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE)
xlog_state_switch_iclogs(log, ctx->commit_iclog, 0);
- xlog_state_release_iclog(log, ctx->commit_iclog);
+ ticket = ctx->ticket;
+ xlog_state_release_iclog(log, ctx->commit_iclog, ticket);
/* Not safe to reference ctx now! */
spin_unlock(&log->l_icloglock);
xlog_cil_cleanup_whiteouts(&whiteouts);
+ xfs_log_ticket_ungrant(log, ticket);
return;
out_skip:
@@ -1207,17 +1398,19 @@ out_skip:
return;
out_abort_free_ticket:
- xfs_log_ticket_ungrant(log, ctx->ticket);
ASSERT(xlog_is_shutdown(log));
xlog_cil_cleanup_whiteouts(&whiteouts);
if (!ctx->commit_iclog) {
+ xfs_log_ticket_ungrant(log, ctx->ticket);
xlog_cil_committed(ctx);
return;
}
spin_lock(&log->l_icloglock);
- xlog_state_release_iclog(log, ctx->commit_iclog);
+ ticket = ctx->ticket;
+ xlog_state_release_iclog(log, ctx->commit_iclog, ticket);
/* Not safe to reference ctx now! */
spin_unlock(&log->l_icloglock);
+ xfs_log_ticket_ungrant(log, ticket);
}
/*
@@ -1232,18 +1425,27 @@ xlog_cil_push_background(
struct xlog *log) __releases(cil->xc_ctx_lock)
{
struct xfs_cil *cil = log->l_cilp;
+ int space_used = atomic_read(&cil->xc_ctx->space_used);
/*
* The cil won't be empty because we are called while holding the
- * context lock so whatever we added to the CIL will still be there
+ * context lock so whatever we added to the CIL will still be there.
*/
- ASSERT(!list_empty(&cil->xc_cil));
+ ASSERT(!test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
/*
- * Don't do a background push if we haven't used up all the
- * space available yet.
+ * We are done if:
+ * - we haven't used up all the space available yet; or
+ * - we've already queued up a push; and
+ * - we're not over the hard limit; and
+ * - nothing has been over the hard limit.
+ *
+ * If so, we don't need to take the push lock as there's nothing to do.
*/
- if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
+ if (space_used < XLOG_CIL_SPACE_LIMIT(log) ||
+ (cil->xc_push_seq == cil->xc_current_sequence &&
+ space_used < XLOG_CIL_BLOCKING_SPACE_LIMIT(log) &&
+ !waitqueue_active(&cil->xc_push_wait))) {
up_read(&cil->xc_ctx_lock);
return;
}
@@ -1270,12 +1472,11 @@ xlog_cil_push_background(
* dipping back down under the hard limit.
*
* The ctx->xc_push_lock provides the serialisation necessary for safely
- * using the lockless waitqueue_active() check in this context.
+ * calling xlog_cil_over_hard_limit() in this context.
*/
- if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) ||
- waitqueue_active(&cil->xc_push_wait)) {
+ if (xlog_cil_over_hard_limit(log, space_used)) {
trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
- ASSERT(cil->xc_ctx->space_used < log->l_logsize);
+ ASSERT(space_used < log->l_logsize);
xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
return;
}
@@ -1334,7 +1535,8 @@ xlog_cil_push_now(
* If the CIL is empty or we've already pushed the sequence then
* there's no more work that we need to do.
*/
- if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
+ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) ||
+ push_seq <= cil->xc_push_seq) {
spin_unlock(&cil->xc_push_lock);
return;
}
@@ -1352,7 +1554,7 @@ xlog_cil_empty(
bool empty = false;
spin_lock(&cil->xc_push_lock);
- if (list_empty(&cil->xc_cil))
+ if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags))
empty = true;
spin_unlock(&cil->xc_push_lock);
return empty;
@@ -1483,7 +1685,7 @@ xlog_cil_flush(
* If the CIL is empty, make sure that any previous checkpoint that may
* still be in an active iclog is pushed to stable storage.
*/
- if (list_empty(&log->l_cilp->xc_cil))
+ if (test_bit(XLOG_CIL_EMPTY, &log->l_cilp->xc_flags))
xfs_log_force(log->l_mp, 0);
}
@@ -1568,7 +1770,7 @@ restart:
* we would have found the context on the committing list.
*/
if (sequence == cil->xc_current_sequence &&
- !list_empty(&cil->xc_cil)) {
+ !test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) {
spin_unlock(&cil->xc_push_lock);
goto restart;
}
@@ -1589,14 +1791,48 @@ out_shutdown:
}
/*
+ * Move dead percpu state to the relevant CIL context structures.
+ *
+ * We have to lock the CIL context here to ensure that nothing is modifying
+ * the percpu state, either addition or removal. Both of these are done under
+ * the CIL context lock, so grabbing that exclusively here will ensure we can
+ * safely drain the cilpcp for the CPU that is dying.
+ */
+void
+xlog_cil_pcp_dead(
+ struct xlog *log,
+ unsigned int cpu)
+{
+ struct xfs_cil *cil = log->l_cilp;
+ struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+ struct xfs_cil_ctx *ctx;
+
+ down_write(&cil->xc_ctx_lock);
+ ctx = cil->xc_ctx;
+ if (ctx->ticket)
+ ctx->ticket->t_curr_res += cilpcp->space_reserved;
+ cilpcp->space_reserved = 0;
+
+ if (!list_empty(&cilpcp->log_items))
+ list_splice_init(&cilpcp->log_items, &ctx->log_items);
+ if (!list_empty(&cilpcp->busy_extents))
+ list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents);
+ atomic_add(cilpcp->space_used, &ctx->space_used);
+ cilpcp->space_used = 0;
+ up_write(&cil->xc_ctx_lock);
+}
+
+/*
* Perform initial CIL structure initialisation.
*/
int
xlog_cil_init(
- struct xlog *log)
+ struct xlog *log)
{
- struct xfs_cil *cil;
- struct xfs_cil_ctx *ctx;
+ struct xfs_cil *cil;
+ struct xfs_cil_ctx *ctx;
+ struct xlog_cil_pcp *cilpcp;
+ int cpu;
cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL);
if (!cil)
@@ -1611,22 +1847,31 @@ xlog_cil_init(
if (!cil->xc_push_wq)
goto out_destroy_cil;
- INIT_LIST_HEAD(&cil->xc_cil);
+ cil->xc_log = log;
+ cil->xc_pcp = alloc_percpu(struct xlog_cil_pcp);
+ if (!cil->xc_pcp)
+ goto out_destroy_wq;
+
+ for_each_possible_cpu(cpu) {
+ cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
+ INIT_LIST_HEAD(&cilpcp->busy_extents);
+ INIT_LIST_HEAD(&cilpcp->log_items);
+ }
+
INIT_LIST_HEAD(&cil->xc_committing);
- spin_lock_init(&cil->xc_cil_lock);
spin_lock_init(&cil->xc_push_lock);
init_waitqueue_head(&cil->xc_push_wait);
init_rwsem(&cil->xc_ctx_lock);
init_waitqueue_head(&cil->xc_start_wait);
init_waitqueue_head(&cil->xc_commit_wait);
- cil->xc_log = log;
log->l_cilp = cil;
ctx = xlog_cil_ctx_alloc();
xlog_cil_ctx_switch(cil, ctx);
-
return 0;
+out_destroy_wq:
+ destroy_workqueue(cil->xc_push_wq);
out_destroy_cil:
kmem_free(cil);
return -ENOMEM;
@@ -1636,14 +1881,17 @@ void
xlog_cil_destroy(
struct xlog *log)
{
- if (log->l_cilp->xc_ctx) {
- if (log->l_cilp->xc_ctx->ticket)
- xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
- kmem_free(log->l_cilp->xc_ctx);
+ struct xfs_cil *cil = log->l_cilp;
+
+ if (cil->xc_ctx) {
+ if (cil->xc_ctx->ticket)
+ xfs_log_ticket_put(cil->xc_ctx->ticket);
+ kmem_free(cil->xc_ctx);
}
- ASSERT(list_empty(&log->l_cilp->xc_cil));
- destroy_workqueue(log->l_cilp->xc_push_wq);
- kmem_free(log->l_cilp);
+ ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
+ free_percpu(cil->xc_pcp);
+ destroy_workqueue(cil->xc_push_wq);
+ kmem_free(cil);
}
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 686c01eb3661..1bd2963e8fbd 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -143,15 +143,16 @@ enum xlog_iclog_state {
#define XLOG_COVER_OPS 5
typedef struct xlog_ticket {
- struct list_head t_queue; /* reserve/write queue */
- struct task_struct *t_task; /* task that owns this ticket */
- xlog_tid_t t_tid; /* transaction identifier : 4 */
- atomic_t t_ref; /* ticket reference count : 4 */
- int t_curr_res; /* current reservation in bytes : 4 */
- int t_unit_res; /* unit reservation in bytes : 4 */
- char t_ocnt; /* original count : 1 */
- char t_cnt; /* current count : 1 */
- uint8_t t_flags; /* properties of reservation : 1 */
+ struct list_head t_queue; /* reserve/write queue */
+ struct task_struct *t_task; /* task that owns this ticket */
+ xlog_tid_t t_tid; /* transaction identifier */
+ atomic_t t_ref; /* ticket reference count */
+ int t_curr_res; /* current reservation */
+ int t_unit_res; /* unit reservation */
+ char t_ocnt; /* original unit count */
+ char t_cnt; /* current unit count */
+ uint8_t t_flags; /* properties of reservation */
+ int t_iclog_hdrs; /* iclog hdrs in t_curr_res */
} xlog_ticket_t;
/*
@@ -221,13 +222,25 @@ struct xfs_cil_ctx {
xfs_lsn_t commit_lsn; /* chkpt commit record lsn */
struct xlog_in_core *commit_iclog;
struct xlog_ticket *ticket; /* chkpt ticket */
- int space_used; /* aggregate size of regions */
+ atomic_t space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */
- struct xfs_log_vec *lv_chain; /* logvecs being pushed */
+ struct list_head log_items; /* log items in chkpt */
+ struct list_head lv_chain; /* logvecs being pushed */
struct list_head iclog_entry;
struct list_head committing; /* ctx committing list */
struct work_struct discard_endio_work;
struct work_struct push_work;
+ atomic_t order_id;
+};
+
+/*
+ * Per-cpu CIL tracking items
+ */
+struct xlog_cil_pcp {
+ int32_t space_used;
+ uint32_t space_reserved;
+ struct list_head busy_extents;
+ struct list_head log_items;
};
/*
@@ -248,8 +261,8 @@ struct xfs_cil_ctx {
*/
struct xfs_cil {
struct xlog *xc_log;
- struct list_head xc_cil;
- spinlock_t xc_cil_lock;
+ unsigned long xc_flags;
+ atomic_t xc_iclog_hdrs;
struct workqueue_struct *xc_push_wq;
struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp;
@@ -263,8 +276,17 @@ struct xfs_cil {
wait_queue_head_t xc_start_wait;
xfs_csn_t xc_current_sequence;
wait_queue_head_t xc_push_wait; /* background push throttle */
+
+ void __percpu *xc_pcp; /* percpu CIL structures */
+#ifdef CONFIG_HOTPLUG_CPU
+ struct list_head xc_pcp_list;
+#endif
} ____cacheline_aligned_in_smp;
+/* xc_flags bit values */
+#define XLOG_CIL_EMPTY 1
+#define XLOG_CIL_PCP_SPACE 2
+
/*
* The amount of log space we allow the CIL to aggregate is difficult to size.
* Whatever we choose, we have to make sure we can get a reservation for the
@@ -486,14 +508,15 @@ struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes,
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
void xlog_print_trans(struct xfs_trans *);
int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx,
- struct xfs_log_vec *log_vector, struct xlog_ticket *tic,
+ struct list_head *lv_chain, struct xlog_ticket *tic,
uint32_t len);
void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog,
int eventual_size);
-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+ struct xlog_ticket *ticket);
/*
* When we crack an atomic LSN, we sample it first so that the value will not
@@ -682,4 +705,9 @@ xlog_kvmalloc(
return p;
}
+/*
+ * CIL CPU dead notifier
+ */
+void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu);
+
#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 315cca5f9322..4edee1d3784a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2223,6 +2223,7 @@ xfs_cpu_dead(
list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
spin_unlock(&xfs_mount_list_lock);
xfs_inodegc_cpu_dead(mp, cpu);
+ xlog_cil_pcp_dead(mp->m_log, cpu);
spin_lock(&xfs_mount_list_lock);
}
spin_unlock(&xfs_mount_list_lock);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c49a61a9757d..7bd16fbff534 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -760,7 +760,7 @@ xfs_log_item_batch_insert(
void
xfs_trans_committed_bulk(
struct xfs_ail *ailp,
- struct xfs_log_vec *log_vector,
+ struct list_head *lv_chain,
xfs_lsn_t commit_lsn,
bool aborted)
{
@@ -775,7 +775,7 @@ xfs_trans_committed_bulk(
spin_unlock(&ailp->ail_lock);
/* unpin all the log items */
- for (lv = log_vector; lv; lv = lv->lv_next ) {
+ list_for_each_entry(lv, lv_chain, lv_list) {
struct xfs_log_item *lip = lv->lv_item;
xfs_lsn_t item_lsn;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64062e3b788b..55819785941c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -45,6 +45,7 @@ struct xfs_log_item {
struct xfs_log_vec *li_lv; /* active log vector */
struct xfs_log_vec *li_lv_shadow; /* standby vector */
xfs_csn_t li_seq; /* CIL commit seq */
+ uint32_t li_order_id; /* CIL commit order */
};
/*
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index f0d79a9050ba..d5400150358e 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -19,7 +19,8 @@ void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
-void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
+void xfs_trans_committed_bulk(struct xfs_ail *ailp,
+ struct list_head *lv_chain,
xfs_lsn_t commit_lsn, bool aborted);
/*
* AIL traversal cursor.